Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_final/grupo5/Trabajo_final_grupo5_python (1).ipynb
4681 views
Kernel: Python 3 (ipykernel)
# import libraries import pandas as pd import numpy as np import re from tqdm import tqdm # controlar el tiempo en un loop import os # linear model library import statsmodels.api as sm # linear regression utiliza todas las columnas de base de datos import statsmodels.formula.api as smf # linear regression usa uan formula from sklearn import datasets, linear_model # models from sklearn.metrics import mean_squared_error, r2_score from linearmodels.iv import IV2SLS # for IV regression import warnings warnings.filterwarnings('ignore') # eliminar warning messages from pandas import Series, DataFrame import matplotlib.pyplot as plt import geopandas as gpd # manejo de datos georefereciados from geopandas import GeoSeries # series de datos georerenciados from shapely.geometry import Point, LineString, Polygon, MultiLineString # objetos geométricos from shapely.ops import nearest_points # operaciones entre objetods geometricos import contextily as cx # Fondo Goole maps, fondo de mapa from pyproj import CRS, Geod # proyecciones a sistemas planares import matplotlib.patches as mpatches import haversine as hs # distancia de grat-cricle entre puntos from geopy import distance # distancia entre puntos from tqdm import tqdm # contador de tiempo en un loop from matplotlib.lines import Line2D import warnings warnings.filterwarnings('ignore') # eliminar warning messages # Export latex table from pystout import pystout user = os.getlogin() # Username os.chdir(f"C:/Users/{user}/Documents/GitHub/1ECO35_2022_2/Trabajo_final/datos") # Set directorio
#Instalamos librerias que no tenemos #!pip install pystout
#Pregunta 1.1. Estadísticas de varaibles de interés #Establecemos la base de datos que usaremos repdata = pd.read_stata(r"../datos/mss_repdata.dta", convert_categoricals=False) repdata
#Nos quedamos con las variables de interés table1 = repdata.loc[:,["NDVI_g", "tot_100", "trade_pGDP", "pop_den_rur", "land_crop", "va_agr", "va_ind_manf"]] table1
#Generamos las estadísticas descriptivas basicas summary_table = table1.describe().loc[["count","mean","std"]] summary_table
#Generamos la transpuesta para que se asemeje a la tabla real summary_table = table1.describe().loc[["count","mean","std"]].T summary_table
#Cambiamos los nombres de las variables de interés table1.columns new_names = ["Tasa de var. del indice de vegetacion", "Terminos de intercambio", "Exportaciones respecto al PBI", "Densidad poblacional rural", "Porcentaje de tierra cultivable en uso", "V. A. del sector agriculta respecto PBI", "V. A. del sector manufacturero respecto PBI"] dict( zip( table1.columns, new_names) )
{'NDVI_g': 'Tasa de var. del indice de vegetacion', 'tot_100': 'Terminos de intercambio', 'trade_pGDP': 'Exportaciones respecto al PBI', 'pop_den_rur': 'Densidad poblacional rural', 'land_crop': 'Porcentaje de tierra cultivable en uso', 'va_agr': 'V. A. del sector agriculta respecto PBI', 'va_ind_manf': 'V. A. del sector manufacturero respecto PBI'}
# Cambiamos los nombres de las estadísticas a su versión en español # Customize summary table index_nuevos_nombres = dict( zip( table1.columns, new_names) ) columns_nuevos_nombres = { "count": "Observaciones", "mean": "Media", "std": "Desviación estándar", } # Rename rows (indexes) and columns summary_table.rename(index=index_nuevos_nombres, columns=columns_nuevos_nombres, inplace=True)
#Nos quedamos con la tabla final del inciso 1 summary_table
# Exportamos el DataFrame a LaTeX como tabla. El codigo para modificar decimales no corrió por un motivo desconocido. summary_table.to_latex( "summary2.tex", caption="Descriptive Statistics", column_format = "lccc" # l: left, c:center , )
#Pregunta 1.2. Réplica de la tabla 3 (pag 737)
#Modelo básico y = repdata['any_prio'] # add constant X = sm.add_constant(repdata.loc[:,["GPCP_g", "GPCP_g_l"]]) X
# sm function ols_model1 = sm.OLS(y, X).fit() # fit() permite correr la print(ols_model1.summary())
OLS Regression Results ============================================================================== Dep. Variable: any_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.000 Method: Least Squares F-statistic: 1.008 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.366 Time: 19:47:42 Log-Likelihood: -448.04 No. Observations: 743 AIC: 902.1 Df Residuals: 740 BIC: 915.9 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ const 0.2697 0.016 16.449 0.000 0.238 0.302 GPCP_g -0.0288 0.085 -0.339 0.735 -0.196 0.138 GPCP_g_l -0.1204 0.086 -1.397 0.163 -0.290 0.049 ============================================================================== Omnibus: 189.379 Durbin-Watson: 0.530 Prob(Omnibus): 0.000 Jarque-Bera (JB): 159.939 Skew: 1.044 Prob(JB): 1.86e-35 Kurtosis: 2.104 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# Robust standar error ols_model1_rb = sm.OLS(y, X).fit(cov_type = "HC1") print(ols_model1_rb.summary())
OLS Regression Results ============================================================================== Dep. Variable: any_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.000 Method: Least Squares F-statistic: 1.014 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.363 Time: 19:47:43 Log-Likelihood: -448.04 No. Observations: 743 AIC: 902.1 Df Residuals: 740 BIC: 915.9 Df Model: 2 Covariance Type: HC1 ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ const 0.2697 0.016 16.374 0.000 0.237 0.302 GPCP_g -0.0288 0.090 -0.321 0.748 -0.205 0.147 GPCP_g_l -0.1204 0.087 -1.391 0.164 -0.290 0.049 ============================================================================== Omnibus: 189.379 Durbin-Watson: 0.530 Prob(Omnibus): 0.000 Jarque-Bera (JB): 159.939 Skew: 1.044 Prob(JB): 1.86e-35 Kurtosis: 2.104 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors are heteroscedasticity robust (HC1)
#alternative robust standar error ols_model1_rb1 = sm.OLS(y, X).fit(cov_type = "HC1") # Huber-White robust se print(ols_model1_rb1.summary())
OLS Regression Results ============================================================================== Dep. Variable: any_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.000 Method: Least Squares F-statistic: 1.014 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.363 Time: 19:47:44 Log-Likelihood: -448.04 No. Observations: 743 AIC: 902.1 Df Residuals: 740 BIC: 915.9 Df Model: 2 Covariance Type: HC1 ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ const 0.2697 0.016 16.374 0.000 0.237 0.302 GPCP_g -0.0288 0.090 -0.321 0.748 -0.205 0.147 GPCP_g_l -0.1204 0.087 -1.391 0.164 -0.290 0.049 ============================================================================== Omnibus: 189.379 Durbin-Watson: 0.530 Prob(Omnibus): 0.000 Jarque-Bera (JB): 159.939 Skew: 1.044 Prob(JB): 1.86e-35 Kurtosis: 2.104 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors are heteroscedasticity robust (HC1)
# Acceder a la información de la tabla ols_model1_rb.summary2() ols_model1_rb.summary2().tables[1]
dir(sm.OLS(y, X)) # Lista de atributos y métodos
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_check_kwargs', '_data_attr', '_df_model', '_df_resid', '_fit_collinear', '_fit_ridge', '_fit_zeros', '_formula_max_endog', '_get_init_kwds', '_handle_data', '_init_keys', '_kwargs_allowed', '_setup_score_hess', '_sqrt_lasso', 'data', 'df_model', 'df_resid', 'endog', 'endog_names', 'exog', 'exog_names', 'fit', 'fit_regularized', 'from_formula', 'get_distribution', 'hessian', 'hessian_factor', 'information', 'initialize', 'k_constant', 'loglike', 'nobs', 'predict', 'rank', 'score', 'weights', 'wendog', 'wexog', 'whiten']
# Y estimados a partir del método predict sm.OLS(y, X).fit().predict()
array([0.28496978, 0.2748217 , 0.25195793, 0.27214113, 0.26302426, 0.26568972, 0.2466797 , 0.27791809, 0.25526007, 0.28224195, 0.26917588, 0.30081865, 0.28540232, 0.21866934, 0.2861791 , 0.27253843, 0.27999266, 0.27934855, 0.26917232, 0.28043463, 0.27237109, 0.26828123, 0.27929381, 0.25381756, 0.26769429, 0.27252959, 0.27744108, 0.24540483, 0.2622242 , 0.28374367, 0.22578955, 0.28753694, 0.26812988, 0.25210689, 0.2885355 , 0.26993623, 0.26967236, 0.25559903, 0.24386254, 0.2739153 , 0.30170927, 0.26866805, 0.28991521, 0.24865613, 0.26155857, 0.27105444, 0.17496034, 0.29626138, 0.29942372, 0.23988976, 0.30644488, 0.18201347, 0.27697215, 0.25379256, 0.24793872, 0.26968144, 0.29971203, 0.27894354, 0.27432929, 0.27281134, 0.28310697, 0.25005254, 0.24888163, 0.27129552, 0.27677362, 0.2524269 , 0.27155199, 0.28413026, 0.2312682 , 0.28199823, 0.27089332, 0.2498764 , 0.28802241, 0.26710212, 0.26587571, 0.26061178, 0.2816283 , 0.25395249, 0.24080911, 0.29886544, 0.26830424, 0.24838017, 0.28130712, 0.26336496, 0.25129281, 0.27310025, 0.28621181, 0.27746349, 0.28334718, 0.287771 , 0.26165149, 0.26820117, 0.26669159, 0.26500867, 0.28311481, 0.26802954, 0.27617081, 0.26463664, 0.2899249 , 0.25148575, 0.26197403, 0.2830811 , 0.27255562, 0.25724725, 0.27454112, 0.25363973, 0.27860369, 0.26622255, 0.26348373, 0.27386322, 0.2701804 , 0.26925542, 0.27306353, 0.27548579, 0.2667854 , 0.26456781, 0.26831091, 0.28676397, 0.27466626, 0.24652724, 0.27373615, 0.26641863, 0.25932942, 0.26635622, 0.26933033, 0.27183976, 0.27969916, 0.27150216, 0.26157324, 0.27072618, 0.27180415, 0.27647364, 0.25294516, 0.24895213, 0.2898935 , 0.28356814, 0.28855814, 0.28652282, 0.23272215, 0.28414438, 0.26690943, 0.21789304, 0.30029623, 0.29493282, 0.21974803, 0.26397938, 0.27688917, 0.19916378, 0.29395434, 0.27651376, 0.27886489, 0.24405499, 0.23491972, 0.26913932, 0.2614215 , 0.28823138, 0.23593892, 0.26834563, 0.28889719, 0.26150323, 0.25476589, 0.28021396, 0.2785566 , 0.28739284, 0.25638399, 0.2685143 , 0.26601195, 0.266387 , 0.28047375, 0.2795782 , 0.24956602, 0.27618855, 0.21320236, 0.28918143, 0.28556657, 0.29926377, 0.23754775, 0.27906998, 0.26678421, 0.24210984, 0.26576587, 0.31459122, 0.27405136, 0.24264627, 0.277868 , 0.29356047, 0.28646617, 0.23867612, 0.27209826, 0.25694241, 0.2604217 , 0.27243465, 0.28738483, 0.27930561, 0.25147996, 0.24456337, 0.28121169, 0.26633678, 0.23019748, 0.29172668, 0.28749903, 0.24031677, 0.27051027, 0.26277372, 0.28804177, 0.2257001 , 0.27398469, 0.27986473, 0.27040816, 0.25399174, 0.28285273, 0.28265373, 0.28293827, 0.25610697, 0.26501434, 0.27097668, 0.26839272, 0.27354068, 0.27955355, 0.24731268, 0.28533431, 0.24160057, 0.28978968, 0.29282641, 0.24800285, 0.24949416, 0.23826094, 0.26310096, 0.25438468, 0.28716589, 0.29769171, 0.26526959, 0.26059674, 0.25591369, 0.25387336, 0.29156528, 0.26901609, 0.27783372, 0.26318861, 0.28377692, 0.28088419, 0.28861658, 0.2754023 , 0.21938379, 0.26384442, 0.27896696, 0.25244708, 0.28188856, 0.25745628, 0.28301216, 0.24279603, 0.30309932, 0.26144509, 0.27874158, 0.25212564, 0.27821526, 0.27051771, 0.2818894 , 0.27417885, 0.25867843, 0.29059362, 0.29452066, 0.26426166, 0.25535766, 0.24858263, 0.26895224, 0.26055379, 0.26625866, 0.27383444, 0.26121317, 0.27357787, 0.27429979, 0.25721523, 0.29231413, 0.27848952, 0.27543343, 0.26381051, 0.27067454, 0.24849388, 0.27312393, 0.29277768, 0.25732122, 0.26590736, 0.25722482, 0.24220079, 0.25835867, 0.27736391, 0.29110722, 0.25071988, 0.27249154, 0.24667352, 0.28453534, 0.26808795, 0.26916287, 0.27444919, 0.27196229, 0.28122464, 0.2752372 , 0.2750082 , 0.28815601, 0.22531638, 0.27212014, 0.27465749, 0.26100753, 0.27865461, 0.260037 , 0.28519386, 0.26132007, 0.2827029 , 0.2640916 , 0.26392098, 0.26866438, 0.28346345, 0.27598611, 0.26640891, 0.29957058, 0.22487442, 0.24185624, 0.3151507 , 0.26392006, 0.25288431, 0.2861609 , 0.28180334, 0.24162488, 0.26748908, 0.2813641 , 0.29542743, 0.26492747, 0.27644114, 0.24360548, 0.28417196, 0.26034035, 0.17026035, 0.30999498, 0.27696997, 0.23869529, 0.29867055, 0.25692226, 0.27413664, 0.25267573, 0.25573401, 0.24826106, 0.26042945, 0.29312151, 0.29211235, 0.25152075, 0.29296429, 0.22099036, 0.28299408, 0.2356418 , 0.24959036, 0.28636198, 0.27331616, 0.28060294, 0.26493497, 0.28194248, 0.29095738, 0.24315764, 0.27541201, 0.25938504, 0.26466378, 0.26737449, 0.26595197, 0.29768923, 0.26383165, 0.25954319, 0.25987779, 0.28399343, 0.26380928, 0.28600594, 0.25770208, 0.28576176, 0.25631652, 0.254864 , 0.29963553, 0.25451147, 0.27546931, 0.2578158 , 0.26132511, 0.26856981, 0.27780048, 0.27176283, 0.27174748, 0.27706396, 0.29420754, 0.23583807, 0.28863299, 0.24272018, 0.26333879, 0.28775854, 0.29702876, 0.22687502, 0.24580235, 0.30930804, 0.25007445, 0.30066286, 0.22727061, 0.29096045, 0.25035398, 0.25630239, 0.26997338, 0.28689748, 0.26815232, 0.28173516, 0.29287411, 0.27978217, 0.26405809, 0.24444535, 0.26822766, 0.27884218, 0.22996273, 0.28585741, 0.28870403, 0.24169328, 0.27014081, 0.26660939, 0.22146699, 0.30172142, 0.27983771, 0.26684136, 0.23897902, 0.27601925, 0.26925608, 0.3101956 , 0.27663425, 0.25205277, 0.2547879 , 0.27424848, 0.28229888, 0.223476 , 0.2662009 , 0.32331318, 0.26671921, 0.2668772 , 0.2074825 , 0.22536602, 0.28593027, 0.30477405, 0.26808999, 0.24947935, 0.25745349, 0.27321761, 0.2748773 , 0.27776485, 0.23928001, 0.2635465 , 0.28749362, 0.29472081, 0.21862439, 0.2676335 , 0.30080166, 0.26966232, 0.29306065, 0.23688644, 0.28930969, 0.24584524, 0.24023323, 0.26620497, 0.28865302, 0.25440343, 0.27250326, 0.28728198, 0.07583003, 0.31090373, 0.29627702, 0.236097 , 0.21528979, 0.31659791, 0.26719918, 0.29998336, 0.27639208, 0.29233801, 0.27626678, 0.23871984, 0.29270098, 0.28971898, 0.19109602, 0.29560864, 0.29507539, 0.20492604, 0.2788876 , 0.25891261, 0.17444965, 0.31339199, 0.27884491, 0.26400569, 0.22305778, 0.27565457, 0.28403302, 0.27106399, 0.28610541, 0.25836077, 0.25912874, 0.27306704, 0.27233956, 0.24449237, 0.26971896, 0.27261432, 0.26493679, 0.27394499, 0.26721449, 0.25684216, 0.27022947, 0.27542375, 0.26995902, 0.27579783, 0.28002371, 0.25709103, 0.24234488, 0.296826 , 0.26192363, 0.25365127, 0.2864295 , 0.24891512, 0.24938813, 0.28482997, 0.27828059, 0.28726476, 0.28035497, 0.27157612, 0.25801002, 0.26678263, 0.26853841, 0.27516955, 0.27874475, 0.28386285, 0.24604537, 0.28752855, 0.29526714, 0.24616023, 0.2486039 , 0.24670796, 0.26244128, 0.25179474, 0.28499766, 0.3037 , 0.26479621, 0.26168092, 0.25250814, 0.25671517, 0.28608606, 0.27361334, 0.27955091, 0.25924962, 0.28006212, 0.2549075 , 0.28531706, 0.2908116 , 0.26898266, 0.25820194, 0.24600473, 0.27005837, 0.27632638, 0.26294027, 0.27953282, 0.26117204, 0.26488158, 0.27616653, 0.27326821, 0.28086191, 0.28593079, 0.28560771, 0.26892224, 0.28822231, 0.16313024, 0.25497695, 0.32293383, 0.21249017, 0.27262115, 0.30609214, 0.16679669, 0.27783477, 0.23734724, 0.29606521, 0.26716866, 0.2454338 , 0.30372741, 0.27167846, 0.26917541, 0.24335986, 0.28579254, 0.25898906, 0.24126848, 0.29032779, 0.29051066, 0.24820739, 0.29564777, 0.22738325, 0.27717002, 0.22919359, 0.25948569, 0.28919155, 0.28430183, 0.26479902, 0.27528442, 0.2899312 , 0.28356204, 0.28107796, 0.21581487, 0.27031131, 0.25826531, 0.24656602, 0.28470031, 0.29621292, 0.25227005, 0.26035504, 0.26259964, 0.26482224, 0.26159492, 0.25209917, 0.3018435 , 0.25325603, 0.25525354, 0.30793087, 0.21737779, 0.2329495 , 0.31341325, 0.28866529, 0.23647127, 0.27112947, 0.27523367, 0.29539826, 0.28145113, 0.28730419, 0.18898682, 0.28578298, 0.22418157, 0.25214535, 0.28336495, 0.28970836, 0.29180466, 0.27580531, 0.2309655 , 0.28955376, 0.25599223, 0.27000073, 0.28290819, 0.28032227, 0.25336004, 0.25340982, 0.29365966, 0.27637294, 0.28032937, 0.28603889, 0.26034879, 0.26022068, 0.27697228, 0.24887283, 0.28978698, 0.28490459, 0.28200161, 0.28765797, 0.26949008, 0.22787498, 0.27018998, 0.28271041, 0.25357493, 0.26073737, 0.27463583, 0.2798359 , 0.24296043, 0.30306325, 0.25136765, 0.26761196, 0.2565544 , 0.28044399, 0.26945989, 0.28381678, 0.27537043, 0.24773015, 0.24674011, 0.30349319, 0.27601743, 0.22736574, 0.28421549, 0.25956987, 0.23834513, 0.2883796 , 0.26600035, 0.27435681, 0.28586025, 0.28055657, 0.25998459, 0.27036974, 0.25652407, 0.27700439, 0.28883738, 0.26650265, 0.26988379, 0.2485796 , 0.29300512, 0.26148703, 0.25306436, 0.27810453, 0.25796663, 0.25084136, 0.26911237, 0.28340842, 0.2867332 , 0.28721706, 0.26366116, 0.27082614, 0.26701527, 0.27578898, 0.28386679, 0.28645862, 0.27412183, 0.26185508, 0.28062735, 0.2602632 , 0.26023209, 0.26803586, 0.29506981, 0.24185199, 0.2570696 , 0.29029694, 0.27443058, 0.29234101, 0.22388564, 0.30235436, 0.26654653, 0.25558241, 0.27048435, 0.28396046, 0.25304765, 0.25309798, 0.31575402, 0.26484374, 0.24554492, 0.22660271, 0.28882671, 0.29258781, 0.22467728, 0.28253382, 0.28212012, 0.29394026, 0.27481921, 0.21302369, 0.30612175, 0.24754037, 0.20656903, 0.26960102, 0.28885259])
# Recordad métodos y atributos print(dir(ols_model1)) # predict para ello uso la función predict ols_model1.predict(X) # acceso a los parámetros ols_model1.params # R2 y R2 ajustado ols_model1.rsquared ols_model1.rsquared_adj
['HC0_se', 'HC1_se', 'HC2_se', 'HC3_se', '_HCCM', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abat_diagonal', '_cache', '_data_attr', '_data_in_cache', '_get_robustcov_results', '_is_nested', '_use_t', '_wexog_singular_values', 'aic', 'bic', 'bse', 'centered_tss', 'compare_f_test', 'compare_lm_test', 'compare_lr_test', 'condition_number', 'conf_int', 'conf_int_el', 'cov_HC0', 'cov_HC1', 'cov_HC2', 'cov_HC3', 'cov_kwds', 'cov_params', 'cov_type', 'df_model', 'df_resid', 'diagn', 'eigenvals', 'el_test', 'ess', 'f_pvalue', 'f_test', 'fittedvalues', 'fvalue', 'get_influence', 'get_prediction', 'get_robustcov_results', 'info_criteria', 'initialize', 'k_constant', 'llf', 'load', 'model', 'mse_model', 'mse_resid', 'mse_total', 'nobs', 'normalized_cov_params', 'outlier_test', 'params', 'predict', 'pvalues', 'remove_data', 'resid', 'resid_pearson', 'rsquared', 'rsquared_adj', 'save', 'scale', 'ssr', 'summary', 'summary2', 't_test', 't_test_pairwise', 'tvalues', 'uncentered_tss', 'use_t', 'wald_test', 'wald_test_terms', 'wresid']
2.0778261906828632e-05
control_formula = "any_prio"+ " ~ "+ "GPCP_g + " + "GPCP_g_l" ols_model1 = smf.ols(control_formula, data=repdata).fit() print(ols_model1.summary())
OLS Regression Results ============================================================================== Dep. Variable: any_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.000 Method: Least Squares F-statistic: 1.008 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.366 Time: 19:47:51 Log-Likelihood: -448.04 No. Observations: 743 AIC: 902.1 Df Residuals: 740 BIC: 915.9 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.2697 0.016 16.449 0.000 0.238 0.302 GPCP_g -0.0288 0.085 -0.339 0.735 -0.196 0.138 GPCP_g_l -0.1204 0.086 -1.397 0.163 -0.290 0.049 ============================================================================== Omnibus: 189.379 Durbin-Watson: 0.530 Prob(Omnibus): 0.000 Jarque-Bera (JB): 159.939 Skew: 1.044 Prob(JB): 1.86e-35 Kurtosis: 2.104 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
ols_model1_skl = linear_model.LinearRegression().fit( X, y ) ols_model1_skl.coef_ # acceso a coeficientes ols_model1_skl.predict(X) # predict en formato array ols_model1_skl.score(X,y) # R cuadrado dir(ols_model1_skl) mean_squared_error( y, ols_model1.predict())**0.5 # root mean square error
0.4422283305322858
# country fixed effect index_columns = np.where( repdata.columns.str.contains( '_time$'))[0] # indice con nombre de variables que terminan con _time country_trend = repdata.columns[index_columns] # se extrae el nombre de todas las variables que terminan con _time
# In[88]: formula_model1 = "any_prio ~ GPCP_g + GPCP_g_l + C(ccode)" + ' + ' + ' + '.join( country_trend ) ols_model1 = smf.ols(formula_model1, data=repdata).fit(cov_type='cluster', cov_kwds={'groups': repdata['ccode']}) print(ols_model1.summary()) rmse_ol1 = round(mean_squared_error( y, ols_model1.predict())**0.5,2) print(rmse_ol1)
--------------------------------------------------------------------------- PatsyError Traceback (most recent call last) Cell In [23], line 6 1 # In[88]: 4 formula_model1 = "any_prio ~ GPCP_g + GPCP_g_l + C(ccode)" + ' + ' + ' + '.join( country_trend ) ----> 6 ols_model1 = smf.ols(formula_model1, data=repdata).fit(cov_type='cluster', cov_kwds={'groups': repdata['ccode']}) 8 print(ols_model1.summary()) 10 rmse_ol1 = round(mean_squared_error( y, ols_model1.predict())**0.5,2)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\statsmodels\base\model.py:200, in Model.from_formula(cls, formula, data, subset, drop_cols, *args, **kwargs) 197 if missing == 'none': # with patsy it's drop or raise. let's raise. 198 missing = 'raise' --> 200 tmp = handle_formula_data(data, None, formula, depth=eval_env, 201 missing=missing) 202 ((endog, exog), missing_idx, design_info) = tmp 203 max_endog = cls._formula_max_endog
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\statsmodels\formula\formulatools.py:63, in handle_formula_data(Y, X, formula, depth, missing) 61 else: 62 if data_util._is_using_pandas(Y, None): ---> 63 result = dmatrices(formula, Y, depth, return_type='dataframe', 64 NA_action=na_action) 65 else: 66 result = dmatrices(formula, Y, depth, return_type='dataframe', 67 NA_action=na_action)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\highlevel.py:309, in dmatrices(formula_like, data, eval_env, NA_action, return_type) 299 """Construct two design matrices given a formula_like and data. 300 301 This function is identical to :func:`dmatrix`, except that it requires (...) 306 See :func:`dmatrix` for details. 307 """ 308 eval_env = EvalEnvironment.capture(eval_env, reference=1) --> 309 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env, 310 NA_action, return_type) 311 if lhs.shape[1] == 0: 312 raise PatsyError("model is missing required outcome variables")
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\highlevel.py:164, in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type) 162 def data_iter_maker(): 163 return iter([data]) --> 164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env, 165 NA_action) 166 if design_infos is not None: 167 return build_design_matrices(design_infos, data, 168 NA_action=NA_action, 169 return_type=return_type)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\highlevel.py:62, in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action) 55 raise PatsyError( 56 "On Python 2, formula strings must be either 'str' objects, " 57 "or else 'unicode' objects containing only ascii " 58 "characters. You passed a unicode string with non-ascii " 59 "characters. I'm afraid you'll have to either switch to " 60 "ascii-only, or else upgrade to Python 3.") 61 if isinstance(formula_like, str): ---> 62 formula_like = ModelDesc.from_formula(formula_like) 63 # fallthrough 64 if isinstance(formula_like, ModelDesc):
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\desc.py:164, in ModelDesc.from_formula(cls, tree_or_string) 162 tree = tree_or_string 163 else: --> 164 tree = parse_formula(tree_or_string) 165 value = Evaluator().eval(tree, require_evalexpr=False) 166 assert isinstance(value, cls)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\parse_formula.py:146, in parse_formula(code, extra_operators) 144 operators = _default_ops + extra_operators 145 operator_strings = [op.token_type for op in operators] --> 146 tree = infix_parse(_tokenize_formula(code, operator_strings), 147 operators, 148 _atomic_token_types) 149 if not isinstance(tree, ParseNode) or tree.type != "~": 150 tree = ParseNode("~", None, [tree], tree.origin)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\infix_parser.py:221, in infix_parse(tokens, operators, atomic_types, trace) 218 print("End of token stream") 220 if want_noun: --> 221 raise PatsyError("expected a noun, but instead the expression ended", 222 c.op_stack[-1].token.origin) 224 while c.op_stack: 225 if c.op_stack[-1].op.token_type == Token.LPAREN:
PatsyError: expected a noun, but instead the expression ended any_prio ~ GPCP_g + GPCP_g_l + C(ccode) + ^
#Modelo 2 y = repdata['war_prio'] # add constant X = sm.add_constant(repdata.loc[:,["GPCP_g", "GPCP_g_l"]]) # sm function ols_model2 = sm.OLS(y, X).fit() # fit() permite correr la regresión print(ols_model2.summary()) # Robust standar error ols_model2_rb = sm.OLS(y, X).fit(cov_type = "HC1") print(ols_model2_rb.summary())
OLS Regression Results ============================================================================== Dep. Variable: war_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.001 Method: Least Squares F-statistic: 1.200 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.302 Time: 19:44:08 Log-Likelihood: -320.10 No. Observations: 743 AIC: 646.2 Df Residuals: 740 BIC: 660.0 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ const 0.1697 0.014 12.292 0.000 0.143 0.197 GPCP_g -0.0977 0.072 -1.363 0.173 -0.238 0.043 GPCP_g_l -0.0891 0.073 -1.228 0.220 -0.232 0.053 ============================================================================== Omnibus: 216.896 Durbin-Watson: 0.482 Prob(Omnibus): 0.000 Jarque-Bera (JB): 434.329 Skew: 1.777 Prob(JB): 4.86e-95 Kurtosis: 4.181 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. OLS Regression Results ============================================================================== Dep. Variable: war_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.001 Method: Least Squares F-statistic: 1.507 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.222 Time: 19:44:08 Log-Likelihood: -320.10 No. Observations: 743 AIC: 646.2 Df Residuals: 740 BIC: 660.0 Df Model: 2 Covariance Type: HC1 ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ const 0.1697 0.014 12.142 0.000 0.142 0.197 GPCP_g -0.0977 0.066 -1.474 0.140 -0.228 0.032 GPCP_g_l -0.0891 0.063 -1.412 0.158 -0.213 0.035 ============================================================================== Omnibus: 216.896 Durbin-Watson: 0.482 Prob(Omnibus): 0.000 Jarque-Bera (JB): 434.329 Skew: 1.777 Prob(JB): 4.86e-95 Kurtosis: 4.181 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors are heteroscedasticity robust (HC1)
#alternative robust standar error ols_model_rb2 = sm.OLS(y, X).fit(cov_type = "HC1") # Huber-White robust se print(ols_model_rb2.summary())
OLS Regression Results ============================================================================== Dep. Variable: war_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.001 Method: Least Squares F-statistic: 1.507 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.222 Time: 19:44:13 Log-Likelihood: -320.10 No. Observations: 743 AIC: 646.2 Df Residuals: 740 BIC: 660.0 Df Model: 2 Covariance Type: HC1 ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ const 0.1697 0.014 12.142 0.000 0.142 0.197 GPCP_g -0.0977 0.066 -1.474 0.140 -0.228 0.032 GPCP_g_l -0.0891 0.063 -1.412 0.158 -0.213 0.035 ============================================================================== Omnibus: 216.896 Durbin-Watson: 0.482 Prob(Omnibus): 0.000 Jarque-Bera (JB): 434.329 Skew: 1.777 Prob(JB): 4.86e-95 Kurtosis: 4.181 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors are heteroscedasticity robust (HC1)
# Acceder a la información de la tabla ols_model_rb2.summary2() ols_model_rb2.summary2().tables[1]
dir(sm.OLS(y, X)) # Lista de atributos y métodos
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_check_kwargs', '_data_attr', '_df_model', '_df_resid', '_fit_collinear', '_fit_ridge', '_fit_zeros', '_formula_max_endog', '_get_init_kwds', '_handle_data', '_init_keys', '_kwargs_allowed', '_setup_score_hess', '_sqrt_lasso', 'data', 'df_model', 'df_resid', 'endog', 'endog_names', 'exog', 'exog_names', 'fit', 'fit_regularized', 'from_formula', 'get_distribution', 'hessian', 'hessian_factor', 'information', 'initialize', 'k_constant', 'loglike', 'nobs', 'predict', 'rank', 'score', 'weights', 'wendog', 'wexog', 'whiten']
# Y estimados a partir del método predict sm.OLS(y, X).fit().predict()
array([0.18704291, 0.16156761, 0.1591636 , 0.16689364, 0.16611632, 0.15020309, 0.16052623, 0.16445212, 0.1678748 , 0.17493067, 0.18461317, 0.21096717, 0.14646171, 0.14220873, 0.18236471, 0.17711868, 0.18211489, 0.18210699, 0.14566715, 0.17998848, 0.16871667, 0.17702256, 0.16698367, 0.15666318, 0.1680248 , 0.17984615, 0.16204504, 0.1430644 , 0.1802974 , 0.14964679, 0.14789666, 0.18521695, 0.15446167, 0.16866698, 0.18319467, 0.17212033, 0.15998443, 0.16219266, 0.14809908, 0.19410448, 0.18898141, 0.18438161, 0.1734193 , 0.14512834, 0.17947203, 0.1080412 , 0.11027748, 0.21478754, 0.16391527, 0.1845413 , 0.13976727, 0.11104919, 0.16801464, 0.14509102, 0.14936252, 0.18708431, 0.19845518, 0.17957136, 0.17248373, 0.18268706, 0.1702437 , 0.14207546, 0.15359244, 0.1777945 , 0.16454007, 0.15436827, 0.186385 , 0.15436469, 0.14794209, 0.18320195, 0.15511713, 0.166916 , 0.18187508, 0.16644452, 0.16207225, 0.15871311, 0.17390356, 0.13542463, 0.16609041, 0.1940256 , 0.15327744, 0.16152694, 0.17701248, 0.15333377, 0.15588651, 0.18178009, 0.18544734, 0.18102702, 0.19238042, 0.17812424, 0.16287117, 0.16802763, 0.16162038, 0.17802261, 0.16563186, 0.17415717, 0.16759646, 0.18110427, 0.17467989, 0.14947877, 0.1714517 , 0.18359009, 0.16247644, 0.16628072, 0.16164332, 0.1636782 , 0.17512774, 0.16253175, 0.16759689, 0.17313232, 0.16962104, 0.16977008, 0.17919028, 0.15981144, 0.16498076, 0.16277749, 0.17780929, 0.18909981, 0.15799448, 0.15521208, 0.17201592, 0.16114667, 0.15991444, 0.16695022, 0.16935811, 0.17699674, 0.17946938, 0.16566712, 0.16434882, 0.17016698, 0.17781272, 0.16502038, 0.15318973, 0.16544964, 0.19148407, 0.18787251, 0.20031107, 0.15687086, 0.14979529, 0.18722649, 0.13143828, 0.14516687, 0.21543774, 0.15845881, 0.12520563, 0.18152704, 0.12671072, 0.13232035, 0.18971026, 0.18393917, 0.16208728, 0.14271711, 0.14576751, 0.16000941, 0.18016869, 0.16290845, 0.14075029, 0.18143644, 0.18127924, 0.1526682 , 0.1646786 , 0.1798863 , 0.18942011, 0.17439827, 0.15942248, 0.1672099 , 0.16370274, 0.17189004, 0.18657038, 0.16575548, 0.14826875, 0.13594103, 0.13915283, 0.18840088, 0.20532517, 0.16983948, 0.15135654, 0.17843133, 0.1520727 , 0.14061413, 0.19234703, 0.21477279, 0.15514228, 0.15207831, 0.18710926, 0.20282314, 0.16165132, 0.14981917, 0.16472176, 0.15445135, 0.16228788, 0.18100661, 0.19059567, 0.16938383, 0.13874985, 0.15727079, 0.18267049, 0.13946622, 0.15154327, 0.19780696, 0.18044045, 0.15052487, 0.1614658 , 0.18290449, 0.15501597, 0.13844977, 0.17849413, 0.18040646, 0.15855663, 0.16498806, 0.18514041, 0.18950848, 0.17160514, 0.15637973, 0.16720823, 0.16970888, 0.16894229, 0.18159817, 0.16500872, 0.14367771, 0.16136921, 0.15740701, 0.20200474, 0.17496123, 0.14547437, 0.13512712, 0.14491251, 0.15345414, 0.16504363, 0.20073796, 0.18862624, 0.1619266 , 0.15739639, 0.14600772, 0.17220029, 0.18415935, 0.17434085, 0.17551435, 0.14825508, 0.18491573, 0.18730939, 0.1945623 , 0.14339232, 0.12653114, 0.17440473, 0.16309447, 0.16725978, 0.16760426, 0.17442337, 0.15696866, 0.17266447, 0.18704612, 0.17229268, 0.16373636, 0.1627915 , 0.17283451, 0.18546656, 0.14741111, 0.16374303, 0.1708573 , 0.20133781, 0.18597484, 0.15978412, 0.14547983, 0.15475955, 0.16405312, 0.15975333, 0.1710791 , 0.1670655 , 0.16449794, 0.17809304, 0.1619682 , 0.17368958, 0.19080806, 0.18097444, 0.16895817, 0.17033488, 0.15735606, 0.1521995 , 0.18861119, 0.17922437, 0.15908425, 0.16268817, 0.144761 , 0.14191308, 0.16210217, 0.19210516, 0.17207626, 0.16141921, 0.15482731, 0.16221672, 0.17979104, 0.16770973, 0.17097229, 0.17857576, 0.15472526, 0.18194385, 0.17270062, 0.1920322 , 0.15509649, 0.13716635, 0.17635244, 0.16594548, 0.17102634, 0.16738002, 0.17410421, 0.17367534, 0.17236854, 0.17645524, 0.16244065, 0.16286941, 0.17662267, 0.18396776, 0.17370455, 0.16101548, 0.16923238, 0.11190001, 0.17801593, 0.20261344, 0.15289799, 0.16481666, 0.19364133, 0.16133588, 0.1467095 , 0.17121662, 0.19580919, 0.18360753, 0.17473608, 0.15650069, 0.1570425 , 0.19069279, 0.09478098, 0.11662815, 0.22025171, 0.15045632, 0.1672967 , 0.18178798, 0.16524338, 0.16360239, 0.15097496, 0.14773611, 0.14540589, 0.17326466, 0.20525889, 0.16924749, 0.17906121, 0.15295605, 0.14632107, 0.16156667, 0.12915442, 0.1654211 , 0.18161083, 0.18346956, 0.1738644 , 0.16963028, 0.1964992 , 0.16736664, 0.15501128, 0.16811332, 0.15880366, 0.16602904, 0.1613465 , 0.1844751 , 0.19097083, 0.1609754 , 0.1533545 , 0.17307076, 0.17343374, 0.17810026, 0.17139475, 0.17215015, 0.17654417, 0.14518327, 0.18024761, 0.180942 , 0.16359373, 0.167556 , 0.15599243, 0.16161041, 0.17353556, 0.17738658, 0.16938193, 0.18410675, 0.19667618, 0.16255364, 0.16037175, 0.1678773 , 0.14420727, 0.17099655, 0.20556127, 0.16805829, 0.11572445, 0.18159303, 0.1800722 , 0.18210499, 0.16185073, 0.15424136, 0.17488568, 0.14798629, 0.15497048, 0.19050428, 0.14164697, 0.17298674, 0.19172098, 0.19318605, 0.17714965, 0.15039347, 0.14711426, 0.18074995, 0.14971273, 0.14660418, 0.1978318 , 0.16626958, 0.1476653 , 0.17641938, 0.13227487, 0.15285324, 0.19920126, 0.1795625 , 0.14991386, 0.13920319, 0.16828129, 0.19342648, 0.20616132, 0.16583065, 0.14727692, 0.15805624, 0.18741266, 0.15220377, 0.12490523, 0.20183799, 0.20601325, 0.17341799, 0.13514278, 0.09431298, 0.14184593, 0.20374632, 0.1962324 , 0.16160206, 0.12980427, 0.16257447, 0.17328315, 0.18290921, 0.15760305, 0.14187327, 0.17081467, 0.20606595, 0.15731004, 0.12554287, 0.18902469, 0.18770634, 0.19027183, 0.1625395 , 0.16043646, 0.17314178, 0.13489106, 0.14129788, 0.18517464, 0.15821201, 0.15011158, 0.21350717, 0.05465512, 0.0476425 , 0.21957367, 0.17853394, 0.10087474, 0.16877245, 0.16398743, 0.18667498, 0.19340881, 0.18671646, 0.19575813, 0.15283282, 0.15527513, 0.21186909, 0.13221655, 0.12165224, 0.21494396, 0.14650931, 0.1253552 , 0.18558154, 0.0949594 , 0.12566243, 0.20696712, 0.17983861, 0.13608168, 0.13428499, 0.1836093 , 0.17832032, 0.18238577, 0.17625905, 0.15436404, 0.16265776, 0.1776055 , 0.15578372, 0.1503598 , 0.17238686, 0.16816736, 0.16873778, 0.1731356 , 0.15981054, 0.15953081, 0.17398574, 0.17258451, 0.17598569, 0.16465887, 0.17446053, 0.13876987, 0.16702164, 0.18797323, 0.15064069, 0.17067385, 0.17244879, 0.13928198, 0.16346391, 0.18397256, 0.18568486, 0.18871859, 0.1804116 , 0.16400864, 0.15943395, 0.16632276, 0.17061527, 0.18057733, 0.17157088, 0.1635388 , 0.1588517 , 0.20205154, 0.17604352, 0.14225616, 0.13985979, 0.15100566, 0.15188 , 0.16080549, 0.20309118, 0.19244664, 0.16293896, 0.15537597, 0.14626345, 0.17022956, 0.18261636, 0.17942167, 0.17431904, 0.14521316, 0.1663386 , 0.16542766, 0.19448551, 0.18572249, 0.1653653 , 0.14639544, 0.15099438, 0.17556209, 0.16852706, 0.1719489 , 0.17247756, 0.15936575, 0.16994439, 0.17545043, 0.17752014, 0.18563946, 0.19237074, 0.17882417, 0.1778929 , 0.12045619, 0.07140599, 0.20074209, 0.17459829, 0.11976406, 0.21110232, 0.12880444, 0.10420644, 0.15247391, 0.15697276, 0.21193017, 0.14725273, 0.17320624, 0.19506753, 0.17539197, 0.15001762, 0.16083143, 0.17959224, 0.1415976 , 0.15742811, 0.20271294, 0.16581939, 0.17720555, 0.15955643, 0.1487012 , 0.15165633, 0.13070567, 0.172528 , 0.19210238, 0.18560423, 0.16678886, 0.18539161, 0.18971351, 0.19536676, 0.14363159, 0.13098983, 0.16664983, 0.14535142, 0.15745466, 0.19991597, 0.17942568, 0.15175086, 0.15889733, 0.16164316, 0.16464185, 0.1480495 , 0.17522556, 0.20085826, 0.14102674, 0.18947149, 0.17171226, 0.10198957, 0.16606492, 0.21912074, 0.16239543, 0.14603957, 0.17041403, 0.18964808, 0.1902954 , 0.20268902, 0.12760449, 0.12630954, 0.15566774, 0.12359586, 0.16186551, 0.1942139 , 0.17708252, 0.19659289, 0.14613178, 0.15557385, 0.17597748, 0.15817872, 0.17623212, 0.18785079, 0.17045474, 0.14378938, 0.17200213, 0.19065048, 0.17862203, 0.18883183, 0.17775083, 0.15466109, 0.17126419, 0.1582525 , 0.1692128 , 0.17515346, 0.18632771, 0.18868285, 0.18903063, 0.14350925, 0.13653112, 0.18037886, 0.1704386 , 0.15197 , 0.16331233, 0.18516054, 0.15446659, 0.17385424, 0.1824608 , 0.15719475, 0.15794858, 0.1675651 , 0.17413515, 0.18333956, 0.15955835, 0.16448492, 0.13434514, 0.1714557 , 0.20562819, 0.14524184, 0.14775105, 0.17944519, 0.13930323, 0.1589058 , 0.18089702, 0.16790272, 0.18130923, 0.19010938, 0.17093998, 0.16493286, 0.1616061 , 0.16070747, 0.19104145, 0.1677181 , 0.17148525, 0.15273654, 0.1693474 , 0.18458853, 0.1514887 , 0.16379162, 0.1711832 , 0.1494963 , 0.15369807, 0.176016 , 0.18770166, 0.19434745, 0.17849891, 0.16640218, 0.16838233, 0.16935242, 0.18324291, 0.17970368, 0.18651227, 0.1660176 , 0.17194037, 0.17286607, 0.15804757, 0.15688771, 0.18808021, 0.17347543, 0.13777878, 0.1739309 , 0.18251431, 0.19563886, 0.15242018, 0.1564431 , 0.19382495, 0.15894914, 0.15679604, 0.18238104, 0.16721841, 0.13976083, 0.18681281, 0.2026636 , 0.1576839 , 0.12280629, 0.14468015, 0.20554201, 0.15639431, 0.14328271, 0.18410435, 0.19104338, 0.2008204 , 0.1317299 , 0.15190923, 0.19194582, 0.11405117, 0.11955249, 0.1834425 , 0.17662792])
# Recordad métodos y atributos print(dir(ols_model2)) # predict para ello uso la función predict ols_model2.predict(X) # acceso a los parámetros ols_model2.params # R2 y R2 ajustado ols_model2.rsquared ols_model2.rsquared_adj
['HC0_se', 'HC1_se', 'HC2_se', 'HC3_se', '_HCCM', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abat_diagonal', '_cache', '_data_attr', '_data_in_cache', '_get_robustcov_results', '_is_nested', '_use_t', '_wexog_singular_values', 'aic', 'bic', 'bse', 'centered_tss', 'compare_f_test', 'compare_lm_test', 'compare_lr_test', 'condition_number', 'conf_int', 'conf_int_el', 'cov_HC0', 'cov_HC1', 'cov_HC2', 'cov_HC3', 'cov_kwds', 'cov_params', 'cov_type', 'df_model', 'df_resid', 'diagn', 'eigenvals', 'el_test', 'ess', 'f_pvalue', 'f_test', 'fittedvalues', 'fvalue', 'get_influence', 'get_prediction', 'get_robustcov_results', 'info_criteria', 'initialize', 'k_constant', 'llf', 'load', 'model', 'mse_model', 'mse_resid', 'mse_total', 'nobs', 'normalized_cov_params', 'outlier_test', 'params', 'predict', 'pvalues', 'remove_data', 'resid', 'resid_pearson', 'rsquared', 'rsquared_adj', 'save', 'scale', 'ssr', 'summary', 'summary2', 't_test', 't_test_pairwise', 'tvalues', 'uncentered_tss', 'use_t', 'wald_test', 'wald_test_terms', 'wresid']
0.0005381642535065012
control_formula = "war_prio"+ " ~ "+ "GPCP_g + " + "GPCP_g_l" ols_model2 = smf.ols(control_formula, data=repdata).fit() print(ols_model2.summary())
OLS Regression Results ============================================================================== Dep. Variable: war_prio R-squared: 0.003 Model: OLS Adj. R-squared: 0.001 Method: Least Squares F-statistic: 1.200 Date: Sun, 11 Dec 2022 Prob (F-statistic): 0.302 Time: 19:44:32 Log-Likelihood: -320.10 No. Observations: 743 AIC: 646.2 Df Residuals: 740 BIC: 660.0 Df Model: 2 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.1697 0.014 12.292 0.000 0.143 0.197 GPCP_g -0.0977 0.072 -1.363 0.173 -0.238 0.043 GPCP_g_l -0.0891 0.073 -1.228 0.220 -0.232 0.053 ============================================================================== Omnibus: 216.896 Durbin-Watson: 0.482 Prob(Omnibus): 0.000 Jarque-Bera (JB): 434.329 Skew: 1.777 Prob(JB): 4.86e-95 Kurtosis: 4.181 Cond. No. 6.26 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
ols_model2_skl = linear_model.LinearRegression().fit( X, y ) ols_model2_skl.coef_ # acceso a coeficientes ols_model2_skl.predict(X) # predict en formato array ols_model2_skl.score(X,y) # R cuadrado dir(ols_model2_skl) mean_squared_error( y, ols_model2.predict())**0.5 # root mean square error
0.37227542395527624
formula_model2 = "war_prio ~ GPCP_g + GPCP_g_l + C(ccode)" + ' + ' + ' + '.join( country_trend ) ols_model2 = smf.ols(formula_model2, data=repdata).fit(cov_type='cluster', cov_kwds={'groups': repdata['ccode']}) print(ols_model2.summary()) rmse_ol2 = round(mean_squared_error( y, ols_model2.predict())**0.5,2) print(rmse_ol2)
--------------------------------------------------------------------------- PatsyError Traceback (most recent call last) Cell In [60], line 6 1 # In[135]: 4 formula_model2 = "war_prio ~ GPCP_g + GPCP_g_l + C(ccode)" + ' + ' + ' + '.join( country_trend ) ----> 6 ols_model2 = smf.ols(formula_model2, data=repdata).fit(cov_type='cluster', cov_kwds={'groups': repdata['ccode']}) 8 print(ols_model2.summary()) 10 rmse_ol2 = round(mean_squared_error( y, ols_model2.predict())**0.5,2)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\statsmodels\base\model.py:200, in Model.from_formula(cls, formula, data, subset, drop_cols, *args, **kwargs) 197 if missing == 'none': # with patsy it's drop or raise. let's raise. 198 missing = 'raise' --> 200 tmp = handle_formula_data(data, None, formula, depth=eval_env, 201 missing=missing) 202 ((endog, exog), missing_idx, design_info) = tmp 203 max_endog = cls._formula_max_endog
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\statsmodels\formula\formulatools.py:63, in handle_formula_data(Y, X, formula, depth, missing) 61 else: 62 if data_util._is_using_pandas(Y, None): ---> 63 result = dmatrices(formula, Y, depth, return_type='dataframe', 64 NA_action=na_action) 65 else: 66 result = dmatrices(formula, Y, depth, return_type='dataframe', 67 NA_action=na_action)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\highlevel.py:309, in dmatrices(formula_like, data, eval_env, NA_action, return_type) 299 """Construct two design matrices given a formula_like and data. 300 301 This function is identical to :func:`dmatrix`, except that it requires (...) 306 See :func:`dmatrix` for details. 307 """ 308 eval_env = EvalEnvironment.capture(eval_env, reference=1) --> 309 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env, 310 NA_action, return_type) 311 if lhs.shape[1] == 0: 312 raise PatsyError("model is missing required outcome variables")
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\highlevel.py:164, in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type) 162 def data_iter_maker(): 163 return iter([data]) --> 164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env, 165 NA_action) 166 if design_infos is not None: 167 return build_design_matrices(design_infos, data, 168 NA_action=NA_action, 169 return_type=return_type)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\highlevel.py:62, in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action) 55 raise PatsyError( 56 "On Python 2, formula strings must be either 'str' objects, " 57 "or else 'unicode' objects containing only ascii " 58 "characters. You passed a unicode string with non-ascii " 59 "characters. I'm afraid you'll have to either switch to " 60 "ascii-only, or else upgrade to Python 3.") 61 if isinstance(formula_like, str): ---> 62 formula_like = ModelDesc.from_formula(formula_like) 63 # fallthrough 64 if isinstance(formula_like, ModelDesc):
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\desc.py:164, in ModelDesc.from_formula(cls, tree_or_string) 162 tree = tree_or_string 163 else: --> 164 tree = parse_formula(tree_or_string) 165 value = Evaluator().eval(tree, require_evalexpr=False) 166 assert isinstance(value, cls)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\parse_formula.py:146, in parse_formula(code, extra_operators) 144 operators = _default_ops + extra_operators 145 operator_strings = [op.token_type for op in operators] --> 146 tree = infix_parse(_tokenize_formula(code, operator_strings), 147 operators, 148 _atomic_token_types) 149 if not isinstance(tree, ParseNode) or tree.type != "~": 150 tree = ParseNode("~", None, [tree], tree.origin)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\infix_parser.py:221, in infix_parse(tokens, operators, atomic_types, trace) 218 print("End of token stream") 220 if want_noun: --> 221 raise PatsyError("expected a noun, but instead the expression ended", 222 c.op_stack[-1].token.origin) 224 while c.op_stack: 225 if c.op_stack[-1].op.token_type == Token.LPAREN:
PatsyError: expected a noun, but instead the expression ended war_prio ~ GPCP_g + GPCP_g_l + C(ccode) + ^
# Lista de explicativa a mostrarse en la tabla explicativas = ['GPCP_g','GPCP_g_l'] # etiquetas a las variables etiquetas = ['Growth in rainfall, t','Growth in rainfall, t-1'] labels = dict(zip(explicativas,etiquetas)) labels
{'GPCP_g': 'Growth in rainfall, t', 'GPCP_g_l': 'Growth in rainfall, t-1'}
pystout(models = [ols_model1,ols_model2], file='regression_table.tex', digits=3, endog_names=['Civil Conflict 25 Deaths (OLS)','Civil Conflict 1,000 Deaths'], exogvars =explicativas , # sellecionamos las variables varlabels = labels, # etiquetas a las variables mgroups={'Ordinary Least Squares':[1,5]}, # titulo a las regresiones modstat={'nobs':'Observarions','rsquared':'R\sym{2}'}, # estadísticos addrows={'Country fixed effects':['yes','yes'], 'Country-specific time trends' : ['yes','yes'], 'Root mean square error': [rmse_ol1,rmse_ol2]}, # añadimos filas addnotes=['Note.—Huber robust standard errors are in parentheses.', 'Regression disturbance terms are clustered at the country level.', 'A country-specific year time trend is included in all specifications (coefficient estimates not reported).', '* Significantly different from zero at 90 percent confidence.', '** Significantly different from zero at 95 percent confidence.', '* Significantly different from zero at 99 percent confidence.'], title='Rainfall and Economic Growth', stars={.1:'',.05:'',.01:'**'} )
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [62], line 12 1 # In[137]: 4 pystout(models = [ols_model1,ols_model2], file='regression_table.tex', digits=3, 5 endog_names=['Civil Conflict 25 Deaths (OLS)','Civil Conflict 1,000 Deaths'], 6 exogvars =explicativas , # sellecionamos las variables 7 varlabels = labels, # etiquetas a las variables 8 mgroups={'Ordinary Least Squares':[1,5]}, # titulo a las regresiones 9 modstat={'nobs':'Observarions','rsquared':'R\sym{2}'}, # estadísticos 10 addrows={'Country fixed effects':['yes','yes'], 'Country-specific time trends' : 11 ['yes','yes'], ---> 12 'Root mean square error': [rmse_ol1,rmse_ol2]}, # añadimos filas 13 addnotes=['Note.—Huber robust standard errors are in parentheses.', 14 'Regression disturbance terms are clustered at the country level.', 15 'A country-specific year time trend is included in all specifications (coefficient estimates not reported).', 16 '* Significantly different from zero at 90 percent confidence.', 17 '** Significantly different from zero at 95 percent confidence.', 18 '* Significantly different from zero at 99 percent confidence.'], 19 title='Rainfall and Economic Growth', 20 stars={.1:'',.05:'',.01:'**'} 21 ) NameError: name 'rmse_ol1' is not defined
pip install statsmodelsr es
#Pregunta3. Geopandas para la Mita
#Establecemos las coordenadas del Norte prueba1 = gpd.read_file(r"C:\Users\Usuario\Documents\GitHub\1ECO35_2022_2\Trabajo_final\datos\Mita\huan_line.shp") prueba1
#Gnereamos gráfico del Norte prueba1.to_crs(epsg=4326,inplace=True) prueba1.plot(color="black")
<AxesSubplot: >
Image in a Jupyter notebook
#Establecemos las coordenadas del centro prueba2 = gpd.read_file(r"C:\Users\Usuario\Documents\GitHub\1ECO35_2022_2\Trabajo_final\datos\Mita\MitaBoundary.shp") prueba2
#Gnereamos gráfico del centro prueba2.to_crs(epsg=4326,inplace=True) prueba2.plot(color="gray")
<AxesSubplot: >
Image in a Jupyter notebook
#Establecemos las coordenadas del Sur prueba3 = gpd.read_file(r"C:\Users\Usuario\Documents\GitHub\1ECO35_2022_2\Trabajo_final\datos\Mita\pot_line.shp") prueba3
#Gnereamos gráfico del Sur prueba3.to_crs(epsg=4326,inplace=True) prueba3.plot(color="black")
<AxesSubplot: >
Image in a Jupyter notebook
#Planteamos las dimensiones del gráfico f, ax = plt.subplots(figsize=(12,12)) prueba1['geometry'].plot(color='blue', edgecolor='black', zorder=0.5, ax = ax) #Introducimos la primera línea en nuestro mapa de color negro prueba2['geometry'].plot(color = 'yellow', edgecolor='gold', zorder=0.5, ax = ax, label="Study Boundary") #Introducimos la segunda línea en nuestro mapa de color gris con borde blanco y le añadimos la etiqueta "Study Boundary" prueba3['geometry'].plot(color = 'blue', edgecolor='black', zorder=0.5, ax = ax, label="Mita Boundary") #Introducimos la tercera línea en nuestro mapa de color negro y le añadimos la etiqueta "Mita Boundary". #Solo ponemos esta e intepretaremos el color como indicador de prueba 1 y prueba 3 # Borramos los valores de los ejes plt.xticks([]) plt.yticks([]) #Añadimos los nombres siguiendo las cooredenadas encontradas con prueba y error. # Añadimos Huancavelica f.text(0.21,0.72,'Huancavelica',color = 'black', size = 11, bbox=dict(facecolor='none', edgecolor='none', pad=6.0)) # Añadimos Potosí f.text(0.76,0.30,'Potosí',color = 'black', size = 11, bbox=dict(facecolor='none', edgecolor='none', pad=6.0)) # Añadimos Uyuni Salt Flat f.text(0.62,0.24,'Uyuni Salt Flat',color = 'black', size = 11, bbox=dict(facecolor='none', edgecolor='none', pad=6.0)) # Añadimos un mapa de fondo que no se parece mucho al original. Pero, de cualquier forma, nos sirve para darle algo de color # al trabajo. El original se creó con altitudos y por este motivo no podemos generarlo. cx.add_basemap(ax, crs="EPSG:4326", attribution = False) # Finalmente, agregamos la leyenda que considera las etiquetas previamente puestas en el perimetro de la mita y de estudio. plt.legend(loc='upper left', title = "",frameon=True, bbox_to_anchor=(0, 0.15), prop={'size': 12}) plt.savefig("mapa2.png")
Image in a Jupyter notebook