CoCalc -- Trabajo_final_grupo5

GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_final/grupo5/Trabajo_final_grupo5_python (1).ipynb
⁴⁶⁸¹ views

Kernel: Python 3 (ipykernel)

In [1]:

# import libraries 

import pandas as pd 
import numpy as np
import re 
from tqdm import tqdm  # controlar el tiempo en un loop
import os


# linear model library

import statsmodels.api as sm  # linear regression utiliza todas las columnas de base de datos 
import statsmodels.formula.api as smf  # linear regression usa uan formula
from sklearn import datasets, linear_model # models 
from sklearn.metrics import mean_squared_error, r2_score
from linearmodels.iv import IV2SLS # for IV regression

import warnings
warnings.filterwarnings('ignore') # eliminar warning messages 

from pandas import Series, DataFrame
import matplotlib.pyplot as plt 

import geopandas as gpd  # manejo de datos georefereciados
from geopandas import GeoSeries # series de datos georerenciados
from shapely.geometry import Point, LineString, Polygon, MultiLineString # objetos geométricos
from shapely.ops import nearest_points  # operaciones entre objetods geometricos
import contextily as cx  # Fondo Goole maps, fondo de mapa 
from pyproj import CRS, Geod # proyecciones a sistemas planares


import matplotlib.patches as mpatches
import haversine as hs # distancia de grat-cricle entre puntos
from  geopy import distance # distancia entre puntos 
from tqdm import tqdm # contador de tiempo en un loop
from matplotlib.lines import Line2D

import warnings
warnings.filterwarnings('ignore') # eliminar warning messages

# Export latex table 

from pystout import pystout

user = os.getlogin()   # Username
os.chdir(f"C:/Users/{user}/Documents/GitHub/1ECO35_2022_2/Trabajo_final/datos") # Set directorio

In [2]:

#Instalamos librerias que no tenemos 
#!pip install pystout

In [3]:

#Pregunta 1.1. Estadísticas de varaibles de interés
#Establecemos la base de datos que usaremos 
repdata = pd.read_stata(r"../datos/mss_repdata.dta",
                           convert_categoricals=False)
repdata

Out[3]:

In [4]:

#Nos quedamos con las variables de interés
table1 = repdata.loc[:,["NDVI_g", "tot_100",
               "trade_pGDP", "pop_den_rur",
               "land_crop", "va_agr", "va_ind_manf"]]

table1

Out[4]:

In [5]:

#Generamos las estadísticas descriptivas basicas
summary_table = table1.describe().loc[["count","mean","std"]]
summary_table

Out[5]:

In [6]:

#Generamos la transpuesta para que se asemeje a la tabla real
summary_table = table1.describe().loc[["count","mean","std"]].T
summary_table

Out[6]:

In [7]:

#Cambiamos los nombres de las variables de interés
table1.columns

new_names = ["Tasa de var. del indice de vegetacion",
                "Terminos de intercambio",
                "Exportaciones respecto al PBI",
                "Densidad poblacional rural",
                "Porcentaje de tierra cultivable en uso",
                "V. A. del sector agriculta respecto PBI",
                "V. A. del sector manufacturero respecto PBI"]

dict( zip( table1.columns, new_names) )

Out[7]:

{'NDVI_g': 'Tasa de var. del indice de vegetacion',
 'tot_100': 'Terminos de intercambio',
 'trade_pGDP': 'Exportaciones respecto al PBI',
 'pop_den_rur': 'Densidad poblacional rural',
 'land_crop': 'Porcentaje de tierra cultivable en uso',
 'va_agr': 'V. A. del sector agriculta respecto PBI',
 'va_ind_manf': 'V. A. del sector manufacturero respecto PBI'}

In [8]:

# Cambiamos los nombres de las estadísticas a su versión en español
# Customize summary table 

index_nuevos_nombres = dict( zip( table1.columns, new_names) )

columns_nuevos_nombres = {
    "count": "Observaciones",
    "mean": "Media",
    "std": "Desviación estándar",
    
}

# Rename rows (indexes) and columns
summary_table.rename(index=index_nuevos_nombres, columns=columns_nuevos_nombres, inplace=True)

In [9]:

#Nos quedamos con la tabla final del inciso 1
summary_table

Out[9]:

In [10]:

# Exportamos el DataFrame a LaTeX como tabla. El codigo para modificar decimales no corrió por un motivo desconocido. 
summary_table.to_latex(
    "summary2.tex",
caption="Descriptive Statistics",
    column_format = "lccc"   # l: left, c:center , 
)

In [11]:

#Pregunta 1.2. Réplica de la tabla  3 (pag 737)

In [12]:

#Modelo básico
y = repdata['any_prio']

# add constant

X = sm.add_constant(repdata.loc[:,["GPCP_g", "GPCP_g_l"]])
X

Out[12]:

In [13]:

# sm function

ols_model1 = sm.OLS(y, X).fit()

# fit() permite correr la

print(ols_model1.summary())

Out[13]:

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               any_prio   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     1.008
Date:                Sun, 11 Dec 2022   Prob (F-statistic):              0.366
Time:                        19:47:42   Log-Likelihood:                -448.04
No. Observations:                 743   AIC:                             902.1
Df Residuals:                     740   BIC:                             915.9
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2697      0.016     16.449      0.000       0.238       0.302
GPCP_g        -0.0288      0.085     -0.339      0.735      -0.196       0.138
GPCP_g_l      -0.1204      0.086     -1.397      0.163      -0.290       0.049
==============================================================================
Omnibus:                      189.379   Durbin-Watson:                   0.530
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              159.939
Skew:                           1.044   Prob(JB):                     1.86e-35
Kurtosis:                       2.104   Cond. No.                         6.26
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [14]:

# Robust standar error

ols_model1_rb = sm.OLS(y, X).fit(cov_type = "HC1")
print(ols_model1_rb.summary())

Out[14]:

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               any_prio   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     1.014
Date:                Sun, 11 Dec 2022   Prob (F-statistic):              0.363
Time:                        19:47:43   Log-Likelihood:                -448.04
No. Observations:                 743   AIC:                             902.1
Df Residuals:                     740   BIC:                             915.9
Df Model:                           2                                         
Covariance Type:                  HC1                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2697      0.016     16.374      0.000       0.237       0.302
GPCP_g        -0.0288      0.090     -0.321      0.748      -0.205       0.147
GPCP_g_l      -0.1204      0.087     -1.391      0.164      -0.290       0.049
==============================================================================
Omnibus:                      189.379   Durbin-Watson:                   0.530
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              159.939
Skew:                           1.044   Prob(JB):                     1.86e-35
Kurtosis:                       2.104   Cond. No.                         6.26
==============================================================================

Notes:
[1] Standard Errors are heteroscedasticity robust (HC1)

In [15]:


#alternative robust standar error
ols_model1_rb1 = sm.OLS(y, X).fit(cov_type = "HC1") # Huber-White robust se

print(ols_model1_rb1.summary())

Out[15]:

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               any_prio   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     1.014
Date:                Sun, 11 Dec 2022   Prob (F-statistic):              0.363
Time:                        19:47:44   Log-Likelihood:                -448.04
No. Observations:                 743   AIC:                             902.1
Df Residuals:                     740   BIC:                             915.9
Df Model:                           2                                         
Covariance Type:                  HC1                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2697      0.016     16.374      0.000       0.237       0.302
GPCP_g        -0.0288      0.090     -0.321      0.748      -0.205       0.147
GPCP_g_l      -0.1204      0.087     -1.391      0.164      -0.290       0.049
==============================================================================
Omnibus:                      189.379   Durbin-Watson:                   0.530
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              159.939
Skew:                           1.044   Prob(JB):                     1.86e-35
Kurtosis:                       2.104   Cond. No.                         6.26
==============================================================================

Notes:
[1] Standard Errors are heteroscedasticity robust (HC1)

In [16]:


# Acceder a la información de la tabla

ols_model1_rb.summary2()

ols_model1_rb.summary2().tables[1]

Out[16]:

In [17]:


dir(sm.OLS(y, X))

# Lista de atributos y métodos

Out[17]:

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_check_kwargs',
 '_data_attr',
 '_df_model',
 '_df_resid',
 '_fit_collinear',
 '_fit_ridge',
 '_fit_zeros',
 '_formula_max_endog',
 '_get_init_kwds',
 '_handle_data',
 '_init_keys',
 '_kwargs_allowed',
 '_setup_score_hess',
 '_sqrt_lasso',
 'data',
 'df_model',
 'df_resid',
 'endog',
 'endog_names',
 'exog',
 'exog_names',
 'fit',
 'fit_regularized',
 'from_formula',
 'get_distribution',
 'hessian',
 'hessian_factor',
 'information',
 'initialize',
 'k_constant',
 'loglike',
 'nobs',
 'predict',
 'rank',
 'score',
 'weights',
 'wendog',
 'wexog',
 'whiten']

In [18]:


# Y estimados a partir del método predict 

sm.OLS(y, X).fit().predict()

Out[18]:

array([0.28496978, 0.2748217 , 0.25195793, 0.27214113, 0.26302426,
       0.26568972, 0.2466797 , 0.27791809, 0.25526007, 0.28224195,
       0.26917588, 0.30081865, 0.28540232, 0.21866934, 0.2861791 ,
       0.27253843, 0.27999266, 0.27934855, 0.26917232, 0.28043463,
       0.27237109, 0.26828123, 0.27929381, 0.25381756, 0.26769429,
       0.27252959, 0.27744108, 0.24540483, 0.2622242 , 0.28374367,
       0.22578955, 0.28753694, 0.26812988, 0.25210689, 0.2885355 ,
       0.26993623, 0.26967236, 0.25559903, 0.24386254, 0.2739153 ,
       0.30170927, 0.26866805, 0.28991521, 0.24865613, 0.26155857,
       0.27105444, 0.17496034, 0.29626138, 0.29942372, 0.23988976,
       0.30644488, 0.18201347, 0.27697215, 0.25379256, 0.24793872,
       0.26968144, 0.29971203, 0.27894354, 0.27432929, 0.27281134,
       0.28310697, 0.25005254, 0.24888163, 0.27129552, 0.27677362,
       0.2524269 , 0.27155199, 0.28413026, 0.2312682 , 0.28199823,
       0.27089332, 0.2498764 , 0.28802241, 0.26710212, 0.26587571,
       0.26061178, 0.2816283 , 0.25395249, 0.24080911, 0.29886544,
       0.26830424, 0.24838017, 0.28130712, 0.26336496, 0.25129281,
       0.27310025, 0.28621181, 0.27746349, 0.28334718, 0.287771  ,
       0.26165149, 0.26820117, 0.26669159, 0.26500867, 0.28311481,
       0.26802954, 0.27617081, 0.26463664, 0.2899249 , 0.25148575,
       0.26197403, 0.2830811 , 0.27255562, 0.25724725, 0.27454112,
       0.25363973, 0.27860369, 0.26622255, 0.26348373, 0.27386322,
       0.2701804 , 0.26925542, 0.27306353, 0.27548579, 0.2667854 ,
       0.26456781, 0.26831091, 0.28676397, 0.27466626, 0.24652724,
       0.27373615, 0.26641863, 0.25932942, 0.26635622, 0.26933033,
       0.27183976, 0.27969916, 0.27150216, 0.26157324, 0.27072618,
       0.27180415, 0.27647364, 0.25294516, 0.24895213, 0.2898935 ,
       0.28356814, 0.28855814, 0.28652282, 0.23272215, 0.28414438,
       0.26690943, 0.21789304, 0.30029623, 0.29493282, 0.21974803,
       0.26397938, 0.27688917, 0.19916378, 0.29395434, 0.27651376,
       0.27886489, 0.24405499, 0.23491972, 0.26913932, 0.2614215 ,
       0.28823138, 0.23593892, 0.26834563, 0.28889719, 0.26150323,
       0.25476589, 0.28021396, 0.2785566 , 0.28739284, 0.25638399,
       0.2685143 , 0.26601195, 0.266387  , 0.28047375, 0.2795782 ,
       0.24956602, 0.27618855, 0.21320236, 0.28918143, 0.28556657,
       0.29926377, 0.23754775, 0.27906998, 0.26678421, 0.24210984,
       0.26576587, 0.31459122, 0.27405136, 0.24264627, 0.277868  ,
       0.29356047, 0.28646617, 0.23867612, 0.27209826, 0.25694241,
       0.2604217 , 0.27243465, 0.28738483, 0.27930561, 0.25147996,
       0.24456337, 0.28121169, 0.26633678, 0.23019748, 0.29172668,
       0.28749903, 0.24031677, 0.27051027, 0.26277372, 0.28804177,
       0.2257001 , 0.27398469, 0.27986473, 0.27040816, 0.25399174,
       0.28285273, 0.28265373, 0.28293827, 0.25610697, 0.26501434,
       0.27097668, 0.26839272, 0.27354068, 0.27955355, 0.24731268,
       0.28533431, 0.24160057, 0.28978968, 0.29282641, 0.24800285,
       0.24949416, 0.23826094, 0.26310096, 0.25438468, 0.28716589,
       0.29769171, 0.26526959, 0.26059674, 0.25591369, 0.25387336,
       0.29156528, 0.26901609, 0.27783372, 0.26318861, 0.28377692,
       0.28088419, 0.28861658, 0.2754023 , 0.21938379, 0.26384442,
       0.27896696, 0.25244708, 0.28188856, 0.25745628, 0.28301216,
       0.24279603, 0.30309932, 0.26144509, 0.27874158, 0.25212564,
       0.27821526, 0.27051771, 0.2818894 , 0.27417885, 0.25867843,
       0.29059362, 0.29452066, 0.26426166, 0.25535766, 0.24858263,
       0.26895224, 0.26055379, 0.26625866, 0.27383444, 0.26121317,
       0.27357787, 0.27429979, 0.25721523, 0.29231413, 0.27848952,
       0.27543343, 0.26381051, 0.27067454, 0.24849388, 0.27312393,
       0.29277768, 0.25732122, 0.26590736, 0.25722482, 0.24220079,
       0.25835867, 0.27736391, 0.29110722, 0.25071988, 0.27249154,
       0.24667352, 0.28453534, 0.26808795, 0.26916287, 0.27444919,
       0.27196229, 0.28122464, 0.2752372 , 0.2750082 , 0.28815601,
       0.22531638, 0.27212014, 0.27465749, 0.26100753, 0.27865461,
       0.260037  , 0.28519386, 0.26132007, 0.2827029 , 0.2640916 ,
       0.26392098, 0.26866438, 0.28346345, 0.27598611, 0.26640891,
       0.29957058, 0.22487442, 0.24185624, 0.3151507 , 0.26392006,
       0.25288431, 0.2861609 , 0.28180334, 0.24162488, 0.26748908,
       0.2813641 , 0.29542743, 0.26492747, 0.27644114, 0.24360548,
       0.28417196, 0.26034035, 0.17026035, 0.30999498, 0.27696997,
       0.23869529, 0.29867055, 0.25692226, 0.27413664, 0.25267573,
       0.25573401, 0.24826106, 0.26042945, 0.29312151, 0.29211235,
       0.25152075, 0.29296429, 0.22099036, 0.28299408, 0.2356418 ,
       0.24959036, 0.28636198, 0.27331616, 0.28060294, 0.26493497,
       0.28194248, 0.29095738, 0.24315764, 0.27541201, 0.25938504,
       0.26466378, 0.26737449, 0.26595197, 0.29768923, 0.26383165,
       0.25954319, 0.25987779, 0.28399343, 0.26380928, 0.28600594,
       0.25770208, 0.28576176, 0.25631652, 0.254864  , 0.29963553,
       0.25451147, 0.27546931, 0.2578158 , 0.26132511, 0.26856981,
       0.27780048, 0.27176283, 0.27174748, 0.27706396, 0.29420754,
       0.23583807, 0.28863299, 0.24272018, 0.26333879, 0.28775854,
       0.29702876, 0.22687502, 0.24580235, 0.30930804, 0.25007445,
       0.30066286, 0.22727061, 0.29096045, 0.25035398, 0.25630239,
       0.26997338, 0.28689748, 0.26815232, 0.28173516, 0.29287411,
       0.27978217, 0.26405809, 0.24444535, 0.26822766, 0.27884218,
       0.22996273, 0.28585741, 0.28870403, 0.24169328, 0.27014081,
       0.26660939, 0.22146699, 0.30172142, 0.27983771, 0.26684136,
       0.23897902, 0.27601925, 0.26925608, 0.3101956 , 0.27663425,
       0.25205277, 0.2547879 , 0.27424848, 0.28229888, 0.223476  ,
       0.2662009 , 0.32331318, 0.26671921, 0.2668772 , 0.2074825 ,
       0.22536602, 0.28593027, 0.30477405, 0.26808999, 0.24947935,
       0.25745349, 0.27321761, 0.2748773 , 0.27776485, 0.23928001,
       0.2635465 , 0.28749362, 0.29472081, 0.21862439, 0.2676335 ,
       0.30080166, 0.26966232, 0.29306065, 0.23688644, 0.28930969,
       0.24584524, 0.24023323, 0.26620497, 0.28865302, 0.25440343,
       0.27250326, 0.28728198, 0.07583003, 0.31090373, 0.29627702,
       0.236097  , 0.21528979, 0.31659791, 0.26719918, 0.29998336,
       0.27639208, 0.29233801, 0.27626678, 0.23871984, 0.29270098,
       0.28971898, 0.19109602, 0.29560864, 0.29507539, 0.20492604,
       0.2788876 , 0.25891261, 0.17444965, 0.31339199, 0.27884491,
       0.26400569, 0.22305778, 0.27565457, 0.28403302, 0.27106399,
       0.28610541, 0.25836077, 0.25912874, 0.27306704, 0.27233956,
       0.24449237, 0.26971896, 0.27261432, 0.26493679, 0.27394499,
       0.26721449, 0.25684216, 0.27022947, 0.27542375, 0.26995902,
       0.27579783, 0.28002371, 0.25709103, 0.24234488, 0.296826  ,
       0.26192363, 0.25365127, 0.2864295 , 0.24891512, 0.24938813,
       0.28482997, 0.27828059, 0.28726476, 0.28035497, 0.27157612,
       0.25801002, 0.26678263, 0.26853841, 0.27516955, 0.27874475,
       0.28386285, 0.24604537, 0.28752855, 0.29526714, 0.24616023,
       0.2486039 , 0.24670796, 0.26244128, 0.25179474, 0.28499766,
       0.3037    , 0.26479621, 0.26168092, 0.25250814, 0.25671517,
       0.28608606, 0.27361334, 0.27955091, 0.25924962, 0.28006212,
       0.2549075 , 0.28531706, 0.2908116 , 0.26898266, 0.25820194,
       0.24600473, 0.27005837, 0.27632638, 0.26294027, 0.27953282,
       0.26117204, 0.26488158, 0.27616653, 0.27326821, 0.28086191,
       0.28593079, 0.28560771, 0.26892224, 0.28822231, 0.16313024,
       0.25497695, 0.32293383, 0.21249017, 0.27262115, 0.30609214,
       0.16679669, 0.27783477, 0.23734724, 0.29606521, 0.26716866,
       0.2454338 , 0.30372741, 0.27167846, 0.26917541, 0.24335986,
       0.28579254, 0.25898906, 0.24126848, 0.29032779, 0.29051066,
       0.24820739, 0.29564777, 0.22738325, 0.27717002, 0.22919359,
       0.25948569, 0.28919155, 0.28430183, 0.26479902, 0.27528442,
       0.2899312 , 0.28356204, 0.28107796, 0.21581487, 0.27031131,
       0.25826531, 0.24656602, 0.28470031, 0.29621292, 0.25227005,
       0.26035504, 0.26259964, 0.26482224, 0.26159492, 0.25209917,
       0.3018435 , 0.25325603, 0.25525354, 0.30793087, 0.21737779,
       0.2329495 , 0.31341325, 0.28866529, 0.23647127, 0.27112947,
       0.27523367, 0.29539826, 0.28145113, 0.28730419, 0.18898682,
       0.28578298, 0.22418157, 0.25214535, 0.28336495, 0.28970836,
       0.29180466, 0.27580531, 0.2309655 , 0.28955376, 0.25599223,
       0.27000073, 0.28290819, 0.28032227, 0.25336004, 0.25340982,
       0.29365966, 0.27637294, 0.28032937, 0.28603889, 0.26034879,
       0.26022068, 0.27697228, 0.24887283, 0.28978698, 0.28490459,
       0.28200161, 0.28765797, 0.26949008, 0.22787498, 0.27018998,
       0.28271041, 0.25357493, 0.26073737, 0.27463583, 0.2798359 ,
       0.24296043, 0.30306325, 0.25136765, 0.26761196, 0.2565544 ,
       0.28044399, 0.26945989, 0.28381678, 0.27537043, 0.24773015,
       0.24674011, 0.30349319, 0.27601743, 0.22736574, 0.28421549,
       0.25956987, 0.23834513, 0.2883796 , 0.26600035, 0.27435681,
       0.28586025, 0.28055657, 0.25998459, 0.27036974, 0.25652407,
       0.27700439, 0.28883738, 0.26650265, 0.26988379, 0.2485796 ,
       0.29300512, 0.26148703, 0.25306436, 0.27810453, 0.25796663,
       0.25084136, 0.26911237, 0.28340842, 0.2867332 , 0.28721706,
       0.26366116, 0.27082614, 0.26701527, 0.27578898, 0.28386679,
       0.28645862, 0.27412183, 0.26185508, 0.28062735, 0.2602632 ,
       0.26023209, 0.26803586, 0.29506981, 0.24185199, 0.2570696 ,
       0.29029694, 0.27443058, 0.29234101, 0.22388564, 0.30235436,
       0.26654653, 0.25558241, 0.27048435, 0.28396046, 0.25304765,
       0.25309798, 0.31575402, 0.26484374, 0.24554492, 0.22660271,
       0.28882671, 0.29258781, 0.22467728, 0.28253382, 0.28212012,
       0.29394026, 0.27481921, 0.21302369, 0.30612175, 0.24754037,
       0.20656903, 0.26960102, 0.28885259])

In [19]:


# Recordad métodos y atributos 

print(dir(ols_model1))

# predict para ello uso la función predict 

ols_model1.predict(X)

# acceso a los parámetros

ols_model1.params

# R2 y R2  ajustado

ols_model1.rsquared
ols_model1.rsquared_adj

Out[19]:

['HC0_se', 'HC1_se', 'HC2_se', 'HC3_se', '_HCCM', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abat_diagonal', '_cache', '_data_attr', '_data_in_cache', '_get_robustcov_results', '_is_nested', '_use_t', '_wexog_singular_values', 'aic', 'bic', 'bse', 'centered_tss', 'compare_f_test', 'compare_lm_test', 'compare_lr_test', 'condition_number', 'conf_int', 'conf_int_el', 'cov_HC0', 'cov_HC1', 'cov_HC2', 'cov_HC3', 'cov_kwds', 'cov_params', 'cov_type', 'df_model', 'df_resid', 'diagn', 'eigenvals', 'el_test', 'ess', 'f_pvalue', 'f_test', 'fittedvalues', 'fvalue', 'get_influence', 'get_prediction', 'get_robustcov_results', 'info_criteria', 'initialize', 'k_constant', 'llf', 'load', 'model', 'mse_model', 'mse_resid', 'mse_total', 'nobs', 'normalized_cov_params', 'outlier_test', 'params', 'predict', 'pvalues', 'remove_data', 'resid', 'resid_pearson', 'rsquared', 'rsquared_adj', 'save', 'scale', 'ssr', 'summary', 'summary2', 't_test', 't_test_pairwise', 'tvalues', 'uncentered_tss', 'use_t', 'wald_test', 'wald_test_terms', 'wresid']

2.0778261906828632e-05

In [20]:


control_formula = "any_prio"+ " ~ "+ "GPCP_g + " + "GPCP_g_l"

ols_model1 = smf.ols(control_formula, data=repdata).fit()

print(ols_model1.summary())

Out[20]:

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               any_prio   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     1.008
Date:                Sun, 11 Dec 2022   Prob (F-statistic):              0.366
Time:                        19:47:51   Log-Likelihood:                -448.04
No. Observations:                 743   AIC:                             902.1
Df Residuals:                     740   BIC:                             915.9
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.2697      0.016     16.449      0.000       0.238       0.302
GPCP_g        -0.0288      0.085     -0.339      0.735      -0.196       0.138
GPCP_g_l      -0.1204      0.086     -1.397      0.163      -0.290       0.049
==============================================================================
Omnibus:                      189.379   Durbin-Watson:                   0.530
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              159.939
Skew:                           1.044   Prob(JB):                     1.86e-35
Kurtosis:                       2.104   Cond. No.                         6.26
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [21]:


ols_model1_skl = linear_model.LinearRegression().fit( X, y )

ols_model1_skl.coef_ # acceso a coeficientes 

ols_model1_skl.predict(X) # predict en formato array 
ols_model1_skl.score(X,y) # R cuadrado
dir(ols_model1_skl)
mean_squared_error( y, ols_model1.predict())**0.5 # root mean square error

Out[21]:

0.4422283305322858

In [22]:


# country fixed effect

index_columns = np.where( repdata.columns.str.contains(
    '_time$'))[0]
# indice con nombre de variables que terminan con _time

country_trend = repdata.columns[index_columns] # se extrae el nombre de todas las variables que terminan con _time

In [23]:

# In[88]:


formula_model1 = "any_prio ~ GPCP_g + GPCP_g_l + C(ccode)" + ' + ' + ' + '.join( country_trend )

ols_model1 = smf.ols(formula_model1, data=repdata).fit(cov_type='cluster', cov_kwds={'groups': repdata['ccode']})

print(ols_model1.summary())

rmse_ol1 = round(mean_squared_error( y, ols_model1.predict())**0.5,2)

print(rmse_ol1)

Out[23]:

---------------------------------------------------------------------------
PatsyError                                Traceback (most recent call last)
Cell In [23], line 6
      1 # In[88]:
      4 formula_model1 = "any_prio ~ GPCP_g + GPCP_g_l + C(ccode)" + ' + ' + ' + '.join( country_trend )
----> 6 ols_model1 = smf.ols(formula_model1, data=repdata).fit(cov_type='cluster', cov_kwds={'groups': repdata['ccode']})
      8 print(ols_model1.summary())
     10 rmse_ol1 = round(mean_squared_error( y, ols_model1.predict())**0.5,2)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\statsmodels\base\model.py:200, in Model.from_formula(cls, formula, data, subset, drop_cols, *args, **kwargs)
    197 if missing == 'none':  # with patsy it's drop or raise. let's raise.
    198     missing = 'raise'
--> 200 tmp = handle_formula_data(data, None, formula, depth=eval_env,
    201                           missing=missing)
    202 ((endog, exog), missing_idx, design_info) = tmp
    203 max_endog = cls._formula_max_endog
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\statsmodels\formula\formulatools.py:63, in handle_formula_data(Y, X, formula, depth, missing)
     61 else:
     62     if data_util._is_using_pandas(Y, None):
---> 63         result = dmatrices(formula, Y, depth, return_type='dataframe',
     64                            NA_action=na_action)
     65     else:
     66         result = dmatrices(formula, Y, depth, return_type='dataframe',
     67                            NA_action=na_action)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\highlevel.py:309, in dmatrices(formula_like, data, eval_env, NA_action, return_type)
    299 """Construct two design matrices given a formula_like and data.
    300 
    301 This function is identical to :func:`dmatrix`, except that it requires
   (...)
    306 See :func:`dmatrix` for details.
    307 """
    308 eval_env = EvalEnvironment.capture(eval_env, reference=1)
--> 309 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
    310                                   NA_action, return_type)
    311 if lhs.shape[1] == 0:
    312     raise PatsyError("model is missing required outcome variables")
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\highlevel.py:164, in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
    162 def data_iter_maker():
    163     return iter([data])
--> 164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
    165                                   NA_action)
    166 if design_infos is not None:
    167     return build_design_matrices(design_infos, data,
    168                                  NA_action=NA_action,
    169                                  return_type=return_type)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\highlevel.py:62, in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
     55         raise PatsyError(
     56             "On Python 2, formula strings must be either 'str' objects, "
     57             "or else 'unicode' objects containing only ascii "
     58             "characters. You passed a unicode string with non-ascii "
     59             "characters. I'm afraid you'll have to either switch to "
     60             "ascii-only, or else upgrade to Python 3.")
     61 if isinstance(formula_like, str):
---> 62     formula_like = ModelDesc.from_formula(formula_like)
     63     # fallthrough
     64 if isinstance(formula_like, ModelDesc):
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\desc.py:164, in ModelDesc.from_formula(cls, tree_or_string)
    162     tree = tree_or_string
    163 else:
--> 164     tree = parse_formula(tree_or_string)
    165 value = Evaluator().eval(tree, require_evalexpr=False)
    166 assert isinstance(value, cls)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\parse_formula.py:146, in parse_formula(code, extra_operators)
    144 operators = _default_ops + extra_operators
    145 operator_strings = [op.token_type for op in operators]
--> 146 tree = infix_parse(_tokenize_formula(code, operator_strings),
    147                    operators,
    148                    _atomic_token_types)
    149 if not isinstance(tree, ParseNode) or tree.type != "~":
    150     tree = ParseNode("~", None, [tree], tree.origin)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\infix_parser.py:221, in infix_parse(tokens, operators, atomic_types, trace)
    218     print("End of token stream")
    220 if want_noun:
--> 221     raise PatsyError("expected a noun, but instead the expression ended",
    222                         c.op_stack[-1].token.origin)
    224 while c.op_stack:
    225     if c.op_stack[-1].op.token_type == Token.LPAREN:
PatsyError: expected a noun, but instead the expression ended
    any_prio ~ GPCP_g + GPCP_g_l + C(ccode) +
                                            ^

In [50]:

#Modelo 2
y = repdata['war_prio']

# add constant

X = sm.add_constant(repdata.loc[:,["GPCP_g", "GPCP_g_l"]])

# sm function

ols_model2 = sm.OLS(y, X).fit()

# fit() permite correr la regresión

print(ols_model2.summary())

# Robust standar error

ols_model2_rb = sm.OLS(y, X).fit(cov_type = "HC1")
print(ols_model2_rb.summary())

Out[50]:

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               war_prio   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     1.200
Date:                Sun, 11 Dec 2022   Prob (F-statistic):              0.302
Time:                        19:44:08   Log-Likelihood:                -320.10
No. Observations:                 743   AIC:                             646.2
Df Residuals:                     740   BIC:                             660.0
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1697      0.014     12.292      0.000       0.143       0.197
GPCP_g        -0.0977      0.072     -1.363      0.173      -0.238       0.043
GPCP_g_l      -0.0891      0.073     -1.228      0.220      -0.232       0.053
==============================================================================
Omnibus:                      216.896   Durbin-Watson:                   0.482
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              434.329
Skew:                           1.777   Prob(JB):                     4.86e-95
Kurtosis:                       4.181   Cond. No.                         6.26
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:               war_prio   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     1.507
Date:                Sun, 11 Dec 2022   Prob (F-statistic):              0.222
Time:                        19:44:08   Log-Likelihood:                -320.10
No. Observations:                 743   AIC:                             646.2
Df Residuals:                     740   BIC:                             660.0
Df Model:                           2                                         
Covariance Type:                  HC1                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1697      0.014     12.142      0.000       0.142       0.197
GPCP_g        -0.0977      0.066     -1.474      0.140      -0.228       0.032
GPCP_g_l      -0.0891      0.063     -1.412      0.158      -0.213       0.035
==============================================================================
Omnibus:                      216.896   Durbin-Watson:                   0.482
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              434.329
Skew:                           1.777   Prob(JB):                     4.86e-95
Kurtosis:                       4.181   Cond. No.                         6.26
==============================================================================

Notes:
[1] Standard Errors are heteroscedasticity robust (HC1)

In [51]:

#alternative robust standar error
ols_model_rb2 = sm.OLS(y, X).fit(cov_type = "HC1") # Huber-White robust se

print(ols_model_rb2.summary())

Out[51]:

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               war_prio   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     1.507
Date:                Sun, 11 Dec 2022   Prob (F-statistic):              0.222
Time:                        19:44:13   Log-Likelihood:                -320.10
No. Observations:                 743   AIC:                             646.2
Df Residuals:                     740   BIC:                             660.0
Df Model:                           2                                         
Covariance Type:                  HC1                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1697      0.014     12.142      0.000       0.142       0.197
GPCP_g        -0.0977      0.066     -1.474      0.140      -0.228       0.032
GPCP_g_l      -0.0891      0.063     -1.412      0.158      -0.213       0.035
==============================================================================
Omnibus:                      216.896   Durbin-Watson:                   0.482
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              434.329
Skew:                           1.777   Prob(JB):                     4.86e-95
Kurtosis:                       4.181   Cond. No.                         6.26
==============================================================================

Notes:
[1] Standard Errors are heteroscedasticity robust (HC1)

In [52]:

# Acceder a la información de la tabla

ols_model_rb2.summary2()

ols_model_rb2.summary2().tables[1]

Out[52]:

In [53]:


dir(sm.OLS(y, X))

# Lista de atributos y métodos

Out[53]:

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_check_kwargs',
 '_data_attr',
 '_df_model',
 '_df_resid',
 '_fit_collinear',
 '_fit_ridge',
 '_fit_zeros',
 '_formula_max_endog',
 '_get_init_kwds',
 '_handle_data',
 '_init_keys',
 '_kwargs_allowed',
 '_setup_score_hess',
 '_sqrt_lasso',
 'data',
 'df_model',
 'df_resid',
 'endog',
 'endog_names',
 'exog',
 'exog_names',
 'fit',
 'fit_regularized',
 'from_formula',
 'get_distribution',
 'hessian',
 'hessian_factor',
 'information',
 'initialize',
 'k_constant',
 'loglike',
 'nobs',
 'predict',
 'rank',
 'score',
 'weights',
 'wendog',
 'wexog',
 'whiten']

In [54]:


# Y estimados a partir del método predict 

sm.OLS(y, X).fit().predict()

Out[54]:

array([0.18704291, 0.16156761, 0.1591636 , 0.16689364, 0.16611632,
       0.15020309, 0.16052623, 0.16445212, 0.1678748 , 0.17493067,
       0.18461317, 0.21096717, 0.14646171, 0.14220873, 0.18236471,
       0.17711868, 0.18211489, 0.18210699, 0.14566715, 0.17998848,
       0.16871667, 0.17702256, 0.16698367, 0.15666318, 0.1680248 ,
       0.17984615, 0.16204504, 0.1430644 , 0.1802974 , 0.14964679,
       0.14789666, 0.18521695, 0.15446167, 0.16866698, 0.18319467,
       0.17212033, 0.15998443, 0.16219266, 0.14809908, 0.19410448,
       0.18898141, 0.18438161, 0.1734193 , 0.14512834, 0.17947203,
       0.1080412 , 0.11027748, 0.21478754, 0.16391527, 0.1845413 ,
       0.13976727, 0.11104919, 0.16801464, 0.14509102, 0.14936252,
       0.18708431, 0.19845518, 0.17957136, 0.17248373, 0.18268706,
       0.1702437 , 0.14207546, 0.15359244, 0.1777945 , 0.16454007,
       0.15436827, 0.186385  , 0.15436469, 0.14794209, 0.18320195,
       0.15511713, 0.166916  , 0.18187508, 0.16644452, 0.16207225,
       0.15871311, 0.17390356, 0.13542463, 0.16609041, 0.1940256 ,
       0.15327744, 0.16152694, 0.17701248, 0.15333377, 0.15588651,
       0.18178009, 0.18544734, 0.18102702, 0.19238042, 0.17812424,
       0.16287117, 0.16802763, 0.16162038, 0.17802261, 0.16563186,
       0.17415717, 0.16759646, 0.18110427, 0.17467989, 0.14947877,
       0.1714517 , 0.18359009, 0.16247644, 0.16628072, 0.16164332,
       0.1636782 , 0.17512774, 0.16253175, 0.16759689, 0.17313232,
       0.16962104, 0.16977008, 0.17919028, 0.15981144, 0.16498076,
       0.16277749, 0.17780929, 0.18909981, 0.15799448, 0.15521208,
       0.17201592, 0.16114667, 0.15991444, 0.16695022, 0.16935811,
       0.17699674, 0.17946938, 0.16566712, 0.16434882, 0.17016698,
       0.17781272, 0.16502038, 0.15318973, 0.16544964, 0.19148407,
       0.18787251, 0.20031107, 0.15687086, 0.14979529, 0.18722649,
       0.13143828, 0.14516687, 0.21543774, 0.15845881, 0.12520563,
       0.18152704, 0.12671072, 0.13232035, 0.18971026, 0.18393917,
       0.16208728, 0.14271711, 0.14576751, 0.16000941, 0.18016869,
       0.16290845, 0.14075029, 0.18143644, 0.18127924, 0.1526682 ,
       0.1646786 , 0.1798863 , 0.18942011, 0.17439827, 0.15942248,
       0.1672099 , 0.16370274, 0.17189004, 0.18657038, 0.16575548,
       0.14826875, 0.13594103, 0.13915283, 0.18840088, 0.20532517,
       0.16983948, 0.15135654, 0.17843133, 0.1520727 , 0.14061413,
       0.19234703, 0.21477279, 0.15514228, 0.15207831, 0.18710926,
       0.20282314, 0.16165132, 0.14981917, 0.16472176, 0.15445135,
       0.16228788, 0.18100661, 0.19059567, 0.16938383, 0.13874985,
       0.15727079, 0.18267049, 0.13946622, 0.15154327, 0.19780696,
       0.18044045, 0.15052487, 0.1614658 , 0.18290449, 0.15501597,
       0.13844977, 0.17849413, 0.18040646, 0.15855663, 0.16498806,
       0.18514041, 0.18950848, 0.17160514, 0.15637973, 0.16720823,
       0.16970888, 0.16894229, 0.18159817, 0.16500872, 0.14367771,
       0.16136921, 0.15740701, 0.20200474, 0.17496123, 0.14547437,
       0.13512712, 0.14491251, 0.15345414, 0.16504363, 0.20073796,
       0.18862624, 0.1619266 , 0.15739639, 0.14600772, 0.17220029,
       0.18415935, 0.17434085, 0.17551435, 0.14825508, 0.18491573,
       0.18730939, 0.1945623 , 0.14339232, 0.12653114, 0.17440473,
       0.16309447, 0.16725978, 0.16760426, 0.17442337, 0.15696866,
       0.17266447, 0.18704612, 0.17229268, 0.16373636, 0.1627915 ,
       0.17283451, 0.18546656, 0.14741111, 0.16374303, 0.1708573 ,
       0.20133781, 0.18597484, 0.15978412, 0.14547983, 0.15475955,
       0.16405312, 0.15975333, 0.1710791 , 0.1670655 , 0.16449794,
       0.17809304, 0.1619682 , 0.17368958, 0.19080806, 0.18097444,
       0.16895817, 0.17033488, 0.15735606, 0.1521995 , 0.18861119,
       0.17922437, 0.15908425, 0.16268817, 0.144761  , 0.14191308,
       0.16210217, 0.19210516, 0.17207626, 0.16141921, 0.15482731,
       0.16221672, 0.17979104, 0.16770973, 0.17097229, 0.17857576,
       0.15472526, 0.18194385, 0.17270062, 0.1920322 , 0.15509649,
       0.13716635, 0.17635244, 0.16594548, 0.17102634, 0.16738002,
       0.17410421, 0.17367534, 0.17236854, 0.17645524, 0.16244065,
       0.16286941, 0.17662267, 0.18396776, 0.17370455, 0.16101548,
       0.16923238, 0.11190001, 0.17801593, 0.20261344, 0.15289799,
       0.16481666, 0.19364133, 0.16133588, 0.1467095 , 0.17121662,
       0.19580919, 0.18360753, 0.17473608, 0.15650069, 0.1570425 ,
       0.19069279, 0.09478098, 0.11662815, 0.22025171, 0.15045632,
       0.1672967 , 0.18178798, 0.16524338, 0.16360239, 0.15097496,
       0.14773611, 0.14540589, 0.17326466, 0.20525889, 0.16924749,
       0.17906121, 0.15295605, 0.14632107, 0.16156667, 0.12915442,
       0.1654211 , 0.18161083, 0.18346956, 0.1738644 , 0.16963028,
       0.1964992 , 0.16736664, 0.15501128, 0.16811332, 0.15880366,
       0.16602904, 0.1613465 , 0.1844751 , 0.19097083, 0.1609754 ,
       0.1533545 , 0.17307076, 0.17343374, 0.17810026, 0.17139475,
       0.17215015, 0.17654417, 0.14518327, 0.18024761, 0.180942  ,
       0.16359373, 0.167556  , 0.15599243, 0.16161041, 0.17353556,
       0.17738658, 0.16938193, 0.18410675, 0.19667618, 0.16255364,
       0.16037175, 0.1678773 , 0.14420727, 0.17099655, 0.20556127,
       0.16805829, 0.11572445, 0.18159303, 0.1800722 , 0.18210499,
       0.16185073, 0.15424136, 0.17488568, 0.14798629, 0.15497048,
       0.19050428, 0.14164697, 0.17298674, 0.19172098, 0.19318605,
       0.17714965, 0.15039347, 0.14711426, 0.18074995, 0.14971273,
       0.14660418, 0.1978318 , 0.16626958, 0.1476653 , 0.17641938,
       0.13227487, 0.15285324, 0.19920126, 0.1795625 , 0.14991386,
       0.13920319, 0.16828129, 0.19342648, 0.20616132, 0.16583065,
       0.14727692, 0.15805624, 0.18741266, 0.15220377, 0.12490523,
       0.20183799, 0.20601325, 0.17341799, 0.13514278, 0.09431298,
       0.14184593, 0.20374632, 0.1962324 , 0.16160206, 0.12980427,
       0.16257447, 0.17328315, 0.18290921, 0.15760305, 0.14187327,
       0.17081467, 0.20606595, 0.15731004, 0.12554287, 0.18902469,
       0.18770634, 0.19027183, 0.1625395 , 0.16043646, 0.17314178,
       0.13489106, 0.14129788, 0.18517464, 0.15821201, 0.15011158,
       0.21350717, 0.05465512, 0.0476425 , 0.21957367, 0.17853394,
       0.10087474, 0.16877245, 0.16398743, 0.18667498, 0.19340881,
       0.18671646, 0.19575813, 0.15283282, 0.15527513, 0.21186909,
       0.13221655, 0.12165224, 0.21494396, 0.14650931, 0.1253552 ,
       0.18558154, 0.0949594 , 0.12566243, 0.20696712, 0.17983861,
       0.13608168, 0.13428499, 0.1836093 , 0.17832032, 0.18238577,
       0.17625905, 0.15436404, 0.16265776, 0.1776055 , 0.15578372,
       0.1503598 , 0.17238686, 0.16816736, 0.16873778, 0.1731356 ,
       0.15981054, 0.15953081, 0.17398574, 0.17258451, 0.17598569,
       0.16465887, 0.17446053, 0.13876987, 0.16702164, 0.18797323,
       0.15064069, 0.17067385, 0.17244879, 0.13928198, 0.16346391,
       0.18397256, 0.18568486, 0.18871859, 0.1804116 , 0.16400864,
       0.15943395, 0.16632276, 0.17061527, 0.18057733, 0.17157088,
       0.1635388 , 0.1588517 , 0.20205154, 0.17604352, 0.14225616,
       0.13985979, 0.15100566, 0.15188   , 0.16080549, 0.20309118,
       0.19244664, 0.16293896, 0.15537597, 0.14626345, 0.17022956,
       0.18261636, 0.17942167, 0.17431904, 0.14521316, 0.1663386 ,
       0.16542766, 0.19448551, 0.18572249, 0.1653653 , 0.14639544,
       0.15099438, 0.17556209, 0.16852706, 0.1719489 , 0.17247756,
       0.15936575, 0.16994439, 0.17545043, 0.17752014, 0.18563946,
       0.19237074, 0.17882417, 0.1778929 , 0.12045619, 0.07140599,
       0.20074209, 0.17459829, 0.11976406, 0.21110232, 0.12880444,
       0.10420644, 0.15247391, 0.15697276, 0.21193017, 0.14725273,
       0.17320624, 0.19506753, 0.17539197, 0.15001762, 0.16083143,
       0.17959224, 0.1415976 , 0.15742811, 0.20271294, 0.16581939,
       0.17720555, 0.15955643, 0.1487012 , 0.15165633, 0.13070567,
       0.172528  , 0.19210238, 0.18560423, 0.16678886, 0.18539161,
       0.18971351, 0.19536676, 0.14363159, 0.13098983, 0.16664983,
       0.14535142, 0.15745466, 0.19991597, 0.17942568, 0.15175086,
       0.15889733, 0.16164316, 0.16464185, 0.1480495 , 0.17522556,
       0.20085826, 0.14102674, 0.18947149, 0.17171226, 0.10198957,
       0.16606492, 0.21912074, 0.16239543, 0.14603957, 0.17041403,
       0.18964808, 0.1902954 , 0.20268902, 0.12760449, 0.12630954,
       0.15566774, 0.12359586, 0.16186551, 0.1942139 , 0.17708252,
       0.19659289, 0.14613178, 0.15557385, 0.17597748, 0.15817872,
       0.17623212, 0.18785079, 0.17045474, 0.14378938, 0.17200213,
       0.19065048, 0.17862203, 0.18883183, 0.17775083, 0.15466109,
       0.17126419, 0.1582525 , 0.1692128 , 0.17515346, 0.18632771,
       0.18868285, 0.18903063, 0.14350925, 0.13653112, 0.18037886,
       0.1704386 , 0.15197   , 0.16331233, 0.18516054, 0.15446659,
       0.17385424, 0.1824608 , 0.15719475, 0.15794858, 0.1675651 ,
       0.17413515, 0.18333956, 0.15955835, 0.16448492, 0.13434514,
       0.1714557 , 0.20562819, 0.14524184, 0.14775105, 0.17944519,
       0.13930323, 0.1589058 , 0.18089702, 0.16790272, 0.18130923,
       0.19010938, 0.17093998, 0.16493286, 0.1616061 , 0.16070747,
       0.19104145, 0.1677181 , 0.17148525, 0.15273654, 0.1693474 ,
       0.18458853, 0.1514887 , 0.16379162, 0.1711832 , 0.1494963 ,
       0.15369807, 0.176016  , 0.18770166, 0.19434745, 0.17849891,
       0.16640218, 0.16838233, 0.16935242, 0.18324291, 0.17970368,
       0.18651227, 0.1660176 , 0.17194037, 0.17286607, 0.15804757,
       0.15688771, 0.18808021, 0.17347543, 0.13777878, 0.1739309 ,
       0.18251431, 0.19563886, 0.15242018, 0.1564431 , 0.19382495,
       0.15894914, 0.15679604, 0.18238104, 0.16721841, 0.13976083,
       0.18681281, 0.2026636 , 0.1576839 , 0.12280629, 0.14468015,
       0.20554201, 0.15639431, 0.14328271, 0.18410435, 0.19104338,
       0.2008204 , 0.1317299 , 0.15190923, 0.19194582, 0.11405117,
       0.11955249, 0.1834425 , 0.17662792])

In [55]:


# Recordad métodos y atributos 

print(dir(ols_model2))

# predict para ello uso la función predict 

ols_model2.predict(X)

# acceso a los parámetros

ols_model2.params

# R2 y R2  ajustado

ols_model2.rsquared
ols_model2.rsquared_adj

Out[55]:

['HC0_se', 'HC1_se', 'HC2_se', 'HC3_se', '_HCCM', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abat_diagonal', '_cache', '_data_attr', '_data_in_cache', '_get_robustcov_results', '_is_nested', '_use_t', '_wexog_singular_values', 'aic', 'bic', 'bse', 'centered_tss', 'compare_f_test', 'compare_lm_test', 'compare_lr_test', 'condition_number', 'conf_int', 'conf_int_el', 'cov_HC0', 'cov_HC1', 'cov_HC2', 'cov_HC3', 'cov_kwds', 'cov_params', 'cov_type', 'df_model', 'df_resid', 'diagn', 'eigenvals', 'el_test', 'ess', 'f_pvalue', 'f_test', 'fittedvalues', 'fvalue', 'get_influence', 'get_prediction', 'get_robustcov_results', 'info_criteria', 'initialize', 'k_constant', 'llf', 'load', 'model', 'mse_model', 'mse_resid', 'mse_total', 'nobs', 'normalized_cov_params', 'outlier_test', 'params', 'predict', 'pvalues', 'remove_data', 'resid', 'resid_pearson', 'rsquared', 'rsquared_adj', 'save', 'scale', 'ssr', 'summary', 'summary2', 't_test', 't_test_pairwise', 'tvalues', 'uncentered_tss', 'use_t', 'wald_test', 'wald_test_terms', 'wresid']

0.0005381642535065012

In [57]:


control_formula = "war_prio"+ " ~ "+ "GPCP_g + " + "GPCP_g_l"

ols_model2 = smf.ols(control_formula, data=repdata).fit()

print(ols_model2.summary())

Out[57]:

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               war_prio   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     1.200
Date:                Sun, 11 Dec 2022   Prob (F-statistic):              0.302
Time:                        19:44:32   Log-Likelihood:                -320.10
No. Observations:                 743   AIC:                             646.2
Df Residuals:                     740   BIC:                             660.0
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.1697      0.014     12.292      0.000       0.143       0.197
GPCP_g        -0.0977      0.072     -1.363      0.173      -0.238       0.043
GPCP_g_l      -0.0891      0.073     -1.228      0.220      -0.232       0.053
==============================================================================
Omnibus:                      216.896   Durbin-Watson:                   0.482
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              434.329
Skew:                           1.777   Prob(JB):                     4.86e-95
Kurtosis:                       4.181   Cond. No.                         6.26
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [58]:


ols_model2_skl = linear_model.LinearRegression().fit( X, y )

ols_model2_skl.coef_ # acceso a coeficientes 

ols_model2_skl.predict(X) # predict en formato array 
ols_model2_skl.score(X,y) # R cuadrado
dir(ols_model2_skl)
mean_squared_error( y, ols_model2.predict())**0.5 # root mean square error

Out[58]:

0.37227542395527624

In [60]:


formula_model2 = "war_prio ~ GPCP_g + GPCP_g_l + C(ccode)" + ' + ' + ' + '.join( country_trend )

ols_model2 = smf.ols(formula_model2, data=repdata).fit(cov_type='cluster', cov_kwds={'groups': repdata['ccode']})

print(ols_model2.summary())

rmse_ol2 = round(mean_squared_error( y, ols_model2.predict())**0.5,2)

print(rmse_ol2)

Out[60]:

---------------------------------------------------------------------------
PatsyError                                Traceback (most recent call last)
Cell In [60], line 6
      1 # In[135]:
      4 formula_model2 = "war_prio ~ GPCP_g + GPCP_g_l + C(ccode)" + ' + ' + ' + '.join( country_trend )
----> 6 ols_model2 = smf.ols(formula_model2, data=repdata).fit(cov_type='cluster', cov_kwds={'groups': repdata['ccode']})
      8 print(ols_model2.summary())
     10 rmse_ol2 = round(mean_squared_error( y, ols_model2.predict())**0.5,2)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\statsmodels\base\model.py:200, in Model.from_formula(cls, formula, data, subset, drop_cols, *args, **kwargs)
    197 if missing == 'none':  # with patsy it's drop or raise. let's raise.
    198     missing = 'raise'
--> 200 tmp = handle_formula_data(data, None, formula, depth=eval_env,
    201                           missing=missing)
    202 ((endog, exog), missing_idx, design_info) = tmp
    203 max_endog = cls._formula_max_endog
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\statsmodels\formula\formulatools.py:63, in handle_formula_data(Y, X, formula, depth, missing)
     61 else:
     62     if data_util._is_using_pandas(Y, None):
---> 63         result = dmatrices(formula, Y, depth, return_type='dataframe',
     64                            NA_action=na_action)
     65     else:
     66         result = dmatrices(formula, Y, depth, return_type='dataframe',
     67                            NA_action=na_action)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\highlevel.py:309, in dmatrices(formula_like, data, eval_env, NA_action, return_type)
    299 """Construct two design matrices given a formula_like and data.
    300 
    301 This function is identical to :func:`dmatrix`, except that it requires
   (...)
    306 See :func:`dmatrix` for details.
    307 """
    308 eval_env = EvalEnvironment.capture(eval_env, reference=1)
--> 309 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
    310                                   NA_action, return_type)
    311 if lhs.shape[1] == 0:
    312     raise PatsyError("model is missing required outcome variables")
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\highlevel.py:164, in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
    162 def data_iter_maker():
    163     return iter([data])
--> 164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
    165                                   NA_action)
    166 if design_infos is not None:
    167     return build_design_matrices(design_infos, data,
    168                                  NA_action=NA_action,
    169                                  return_type=return_type)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\highlevel.py:62, in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
     55         raise PatsyError(
     56             "On Python 2, formula strings must be either 'str' objects, "
     57             "or else 'unicode' objects containing only ascii "
     58             "characters. You passed a unicode string with non-ascii "
     59             "characters. I'm afraid you'll have to either switch to "
     60             "ascii-only, or else upgrade to Python 3.")
     61 if isinstance(formula_like, str):
---> 62     formula_like = ModelDesc.from_formula(formula_like)
     63     # fallthrough
     64 if isinstance(formula_like, ModelDesc):
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\desc.py:164, in ModelDesc.from_formula(cls, tree_or_string)
    162     tree = tree_or_string
    163 else:
--> 164     tree = parse_formula(tree_or_string)
    165 value = Evaluator().eval(tree, require_evalexpr=False)
    166 assert isinstance(value, cls)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\parse_formula.py:146, in parse_formula(code, extra_operators)
    144 operators = _default_ops + extra_operators
    145 operator_strings = [op.token_type for op in operators]
--> 146 tree = infix_parse(_tokenize_formula(code, operator_strings),
    147                    operators,
    148                    _atomic_token_types)
    149 if not isinstance(tree, ParseNode) or tree.type != "~":
    150     tree = ParseNode("~", None, [tree], tree.origin)
File ~\anaconda3\envs\entorno_geopandas\lib\site-packages\patsy\infix_parser.py:221, in infix_parse(tokens, operators, atomic_types, trace)
    218     print("End of token stream")
    220 if want_noun:
--> 221     raise PatsyError("expected a noun, but instead the expression ended",
    222                         c.op_stack[-1].token.origin)
    224 while c.op_stack:
    225     if c.op_stack[-1].op.token_type == Token.LPAREN:
PatsyError: expected a noun, but instead the expression ended
    war_prio ~ GPCP_g + GPCP_g_l + C(ccode) +
                                            ^

In [61]:


# Lista de explicativa a mostrarse en la tabla

explicativas = ['GPCP_g','GPCP_g_l']

# etiquetas a las variables 

etiquetas = ['Growth in rainfall, t','Growth in rainfall, t-1']


labels = dict(zip(explicativas,etiquetas))
labels

Out[61]:

{'GPCP_g': 'Growth in rainfall, t', 'GPCP_g_l': 'Growth in rainfall, t-1'}

In [62]:


pystout(models = [ols_model1,ols_model2], file='regression_table.tex', digits=3,
        endog_names=['Civil Conflict 25 Deaths (OLS)','Civil Conflict 1,000 Deaths'],
        exogvars =explicativas ,  # sellecionamos las variables 
        varlabels = labels,  # etiquetas a las variables
        mgroups={'Ordinary Least Squares':[1,5]}, # titulo a las regresiones
        modstat={'nobs':'Observarions','rsquared':'R\sym{2}'}, # estadísticos 
        addrows={'Country fixed effects':['yes','yes'], 'Country-specific time trends' :
                 ['yes','yes'],
                 'Root mean square error': [rmse_ol1,rmse_ol2]}, # añadimos filas 
        addnotes=['Note.—Huber robust standard errors are in parentheses.',
                  'Regression disturbance terms are clustered at the country level.',
                 'A country-specific year time trend is included in all specifications (coefficient estimates not reported).',
                 '* Significantly different from zero at 90 percent confidence.',
                 '** Significantly different from zero at 95 percent confidence.',
                 '* Significantly different from zero at 99 percent confidence.'],
        title='Rainfall and Economic Growth',
        stars={.1:'',.05:'',.01:'**'}
       )

Out[62]:

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In [62], line 12
# In[137]:
pystout(models = [ols_model1,ols_model2], file='regression_table.tex', digits=3,
       endog_names=['Civil Conflict 25 Deaths (OLS)','Civil Conflict 1,000 Deaths'],
       exogvars =explicativas ,  # sellecionamos las variables 
       varlabels = labels,  # etiquetas a las variables
       mgroups={'Ordinary Least Squares':[1,5]}, # titulo a las regresiones
       modstat={'nobs':'Observarions','rsquared':'R\sym{2}'}, # estadísticos 
       addrows={'Country fixed effects':['yes','yes'], 'Country-specific time trends' :
                ['yes','yes'],
---> 12                  'Root mean square error': [rmse_ol1,rmse_ol2]}, # añadimos filas 
       addnotes=['Note.—Huber robust standard errors are in parentheses.',
                 'Regression disturbance terms are clustered at the country level.',
                'A country-specific year time trend is included in all specifications (coefficient estimates not reported).',
                '* Significantly different from zero at 90 percent confidence.',
                '** Significantly different from zero at 95 percent confidence.',
                '* Significantly different from zero at 99 percent confidence.'],
       title='Rainfall and Economic Growth',
       stars={.1:'',.05:'',.01:'**'}
      )
NameError: name 'rmse_ol1' is not defined

In [ ]:

pip install statsmodelsr es

In [24]:

#Pregunta3. Geopandas para la Mita

In [25]:

#Establecemos las coordenadas del Norte
prueba1 = gpd.read_file(r"C:\Users\Usuario\Documents\GitHub\1ECO35_2022_2\Trabajo_final\datos\Mita\huan_line.shp")
prueba1

Out[25]:

In [26]:

#Gnereamos gráfico del Norte
prueba1.to_crs(epsg=4326,inplace=True)
prueba1.plot(color="black")

Out[26]:

<AxesSubplot: >

In [27]:

#Establecemos las coordenadas del centro
prueba2 = gpd.read_file(r"C:\Users\Usuario\Documents\GitHub\1ECO35_2022_2\Trabajo_final\datos\Mita\MitaBoundary.shp")
prueba2

Out[27]:

In [28]:

#Gnereamos gráfico del centro 
prueba2.to_crs(epsg=4326,inplace=True)
prueba2.plot(color="gray")

Out[28]:

<AxesSubplot: >

In [29]:

#Establecemos las coordenadas del Sur
prueba3 = gpd.read_file(r"C:\Users\Usuario\Documents\GitHub\1ECO35_2022_2\Trabajo_final\datos\Mita\pot_line.shp")
prueba3

Out[29]:

In [30]:

#Gnereamos gráfico del Sur
prueba3.to_crs(epsg=4326,inplace=True)
prueba3.plot(color="black")

Out[30]:

<AxesSubplot: >

In [32]:

#Planteamos las dimensiones del gráfico 
f, ax = plt.subplots(figsize=(12,12)) 

prueba1['geometry'].plot(color='blue', edgecolor='black', zorder=0.5, ax = ax) 
#Introducimos la primera línea en nuestro mapa de color negro  
prueba2['geometry'].plot(color = 'yellow', edgecolor='gold', zorder=0.5, ax = ax, label="Study Boundary")    
#Introducimos la segunda línea en nuestro mapa de color gris con borde blanco y le añadimos la etiqueta "Study Boundary"
prueba3['geometry'].plot(color = 'blue', edgecolor='black', zorder=0.5, ax = ax, label="Mita Boundary")
#Introducimos la tercera línea en nuestro mapa de color negro y le añadimos la etiqueta "Mita Boundary". 
#Solo ponemos esta e intepretaremos el color como indicador de prueba 1 y prueba 3 

# Borramos los valores de los ejes

plt.xticks([])
plt.yticks([])

#Añadimos los nombres siguiendo las cooredenadas encontradas con prueba y error. 

# Añadimos Huancavelica 
f.text(0.21,0.72,'Huancavelica',color = 'black', size = 11,
        bbox=dict(facecolor='none', edgecolor='none', pad=6.0)) 

# Añadimos Potosí 
f.text(0.76,0.30,'Potosí',color = 'black', size = 11,
        bbox=dict(facecolor='none', edgecolor='none', pad=6.0)) 

# Añadimos Uyuni Salt Flat
f.text(0.62,0.24,'Uyuni Salt Flat',color = 'black', size = 11,
        bbox=dict(facecolor='none', edgecolor='none', pad=6.0)) 

# Añadimos un mapa de fondo que no se parece mucho al original. Pero, de cualquier forma, nos sirve para darle algo de color
# al trabajo. El original se creó con altitudos y por este motivo no podemos generarlo. 

cx.add_basemap(ax, crs="EPSG:4326", attribution = False)


# Finalmente, agregamos la leyenda que considera las etiquetas previamente puestas en el perimetro de la mita y de estudio.

plt.legend(loc='upper left',
           title = "",frameon=True,
            bbox_to_anchor=(0, 0.15), prop={'size': 12})
plt.savefig("mapa2.png")

Out[32]:

In [ ]:

Product

Resources

Company