Path: blob/main/Trabajo_grupal/WG6/Grupo_3_py_Ejercicio1.py
2714 views
#!pip install weightedcalcs12import os # for usernanme y set direcotrio3import pandas as pd4import numpy as np5from tqdm import tqdm # controlar el tiempo en un loop678user = os.getlogin()910os.chdir(f"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf") # Set directorio111213" AÑO 2020"1415enaho_2020 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta")1617enaho01 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta",18convert_categoricals=False)1920labels01 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta",21convert_categoricals=False, iterator=True)22labels01.variable_labels()2324labels01.value_labels().keys()2526labels01.value_labels()['p110']2728"Modulo 34 Sumaria 2020"2930enaho34 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta",31convert_categoricals=False)3233labels34 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta",34convert_categoricals=False, iterator=True)3536labels34.variable_labels()3738"Merge 01 + 34"394041num = ["34"]4243merge_hog2020 = enaho014445for i in tqdm(num):46merge_hog2020 = pd.merge(merge_hog2020, globals()[f'enaho{i}'],47on = ["conglome", "vivienda", "hogar"],48how = "left",49suffixes=('', '_y'),50validate = "1:1")51"AÑO 2019"5253enaho0119 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2019/687-Modulo01/687-Modulo01/enaho01-2019-100.dta",54convert_categoricals=False)5556labels0119 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2019/687-Modulo01/687-Modulo01/enaho01-2019-100.dta",57convert_categoricals=False, iterator=True)58labels0119.variable_labels()5960labels0119.value_labels().keys()6162labels0119.value_labels()['p110']6364"Modulo 34 sumaria 2019"6566enaho3419 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2019/687-Modulo34/687-Modulo34/sumaria-2019.dta",67convert_categoricals=False)6869labels3419 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2019/687-Modulo34/687-Modulo34/sumaria-2019.dta",70convert_categoricals=False, iterator=True)7172labels3419.variable_labels()7374num = ["34"]7576merge_hog2019 = enaho011977for i in tqdm(num):78merge_hog2019 = pd.merge(merge_hog2019, globals()[f'enaho{i}'],79on = ["conglome", "vivienda", "hogar"],80how = "left",81suffixes=('', '_y'),82validate = "1:1")8384"Append"8586merge_append = merge_hog2020.append(merge_hog2019, ignore_index = True)8788899091"INGRESO Y GASTO MENNSUAL"9293merge_append["ingreso_month"] = merge_append["inghog1d"]/(12*merge_append["mieperho"])9495merge_append["gasto_month"] = merge_append["gashog2d"]/(12*merge_append["mieperho"])9697"DEFLACTOR ESPACIAL Y TEMPORAL"9899"ESPACIAL"100merge_append["gasto_month_defl"] = merge_append["gasto_month"]*(merge_append["ld"])101102"TEMPORAL"103104deflactor = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo34/737-Modulo34/Gasto2020/Bases/deflactores_base2020_new.dta",105convert_categoricals=False)106num = ["34"]107108merge_base = merge_append109110merge_append['dpto'] = merge_append.ubigeo.str.slice(0,2)111112merge_append['aniorec'] = merge_append['aÑo']113114merge_append['dpto']=merge_append['dpto'].astype(int)115merge_append['aniorec']=merge_append['aniorec'].astype(int)116merge_append = merge_append.merge(deflactor, how='inner', on=None, left_on=['dpto', 'aniorec'], right_on=['dpto', 'aniorec'], left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None)117118"DIVIDA POR mieperho, 12, LD e i00"119120merge_append["ingreso_month_def"] = merge_append["ingreso_month"]/(merge_append["mieperho"])121merge_append["ingreso_month_def2"] = merge_append["ingreso_month"]/12122merge_append["ingreso_month_def3"] = merge_append["ingreso_month"]/(merge_append["ld"])123merge_append["ingreso_month_def4"] = merge_append["ingreso_month"]/(merge_append["i00"])124125merge_append["gasto_month_def"] = merge_append["gasto_month"]/(merge_append["mieperho"])126merge_append["gasto_month_def2"] = merge_append["gasto_month"]/12127merge_append["gasto_month_def3"] = merge_append["gasto_month"]/(merge_append["ld"])128merge_append["gasto_month_def4"] = merge_append["gasto_month"]/(merge_append["i00"])129130print(merge_append)131132"Groupby"133134enaho02 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo02/737-Modulo02/enaho01-2020-200.dta",135convert_categoricals=False)136137groupby = enaho02.groupby(['p208a'])[['hogar']].head()138139