Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG6/Grupo_9_py_Pregunta1.ipynb
2714 views
Kernel: Python 3 (ipykernel)
### Pregunta 1 #### Grupo 9 #### from IPython.display import display, HTML display(HTML(data=""" <style> div#notebook-container { width: 75%; } div#menubar-container { width: 95%; } div#maintoolbar-container { width: 65%; }a </style> """))
#1.0 Set directorio import numpy as np import pandas as pd from pandas import DataFrame,Series
#2.0 Cargar dataset de ENAHO enaho01 = pd.read_stata( r"../../enaho//2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta") sumaria = pd.read_stata(r"../../enaho/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")
#3.0 Juntar bases # _merge3 == 3 Mantener solo la intercepción enaho_2020 = pd.merge(enaho01, sumaria, on = ["conglome", "vivienda", "hogar"], how = "left", suffixes=('', '_y'))
#4.0 Filtrar variables a usar enaho_2020 = enaho_2020[["conglome", "vivienda", "hogar" ,"ubigeo", "aÑo" , "mieperho", "inghog1d", "gashog2d", "ld"]]
### Repetir procedimientos con data 2019 "2.0" enaho01 = pd.read_stata(r"../../enaho/2019/687-Modulo01/687-Modulo01/enaho01-2019-100.dta") sumaria = pd.read_stata(r"../../enaho/2019/687-Modulo34/687-Modulo34/sumaria-2019.dta") "3.0" enaho_2019 = pd.merge(enaho01, sumaria, on = ["conglome", "vivienda", "hogar"], how = "left", suffixes=('', '_y')) "4.0" enaho_2019 = enaho_2019[["conglome", "vivienda", "hogar" ,"ubigeo", "aÑo" , "mieperho", "inghog1d", "gashog2d", "ld"]]
# 5.0 Append de base 2019 y 2020 merge_append = enaho_2020.append(enaho_2019, ignore_index = True) #ignore_index= True : no haya conflictos de indexing merge_append
C:\Users\acwe\AppData\Local\Temp\ipykernel_14456\1072011287.py:2: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. merge_append = enaho_2020.append(enaho_2019,
#Año: merge_append.rename(columns={"aÑo":"aniorec"}, inplace=True) merge_append=merge_append.astype({'aniorec':float}) # Nota, se tiene que convertir a float para luego hacer merge merge_append["aniorec"]
0 2020.0 1 2020.0 2 2020.0 3 2020.0 4 2020.0 ... 97286 2019.0 97287 2019.0 97288 2019.0 97289 2019.0 97290 2019.0 Name: aniorec, Length: 97291, dtype: float64
#Departamento: merge_append['dpto'] = merge_append['ubigeo'].str[:2] merge_append= merge_append.astype({'dpto':float}) # Nota, se tiene que convertir a float para luego hacer merge merge_append.head(10)
#6.0 Deflactar variables #6.1. Cargar base con deflactores #Deflactor espacial: # ld de base #Deflactor temporal: deflactor = pd.read_stata(r"../../enaho/2020/737-Modulo34/737-Modulo34/ConstVarGasto-Metodologia actualizada/Gasto2020/Bases/deflactores_base2020_new.dta",) deflactor = deflactor[ ["dpto", "aniorec", "i00" ]] deflactor
#6.2. Unir deflactor a la base. enaho = pd.merge(merge_append, deflactor, on = ["dpto", "aniorec"], how = 'left', validate = "m:1") enaho
#7.0 Dividir las variables de ingreso y gasto por mieperho, 12, id e i00 enaho['den']=enaho['mieperho']*enaho['ld']*enaho['i00']*12 #denominador #Ingreso per cápita = ipc enaho['ipc'] = enaho['inghog1d'] / enaho['den'] #mensual #Gasto per cápita = gpc enaho['gpc'] = enaho['gashog2d'] / enaho['den'] #mensual
#8.0 Mismos resultados que en R enaho= enaho.sort_values(["aniorec", "conglome", "vivienda", "hogar"]) enaho