Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG6/Grupo_3_py_Ejercicio1.py
2714 views
1
#!pip install weightedcalcs
2
3
import os # for usernanme y set direcotrio
4
import pandas as pd
5
import numpy as np
6
from tqdm import tqdm # controlar el tiempo en un loop
7
8
9
user = os.getlogin()
10
11
os.chdir(f"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf") # Set directorio
12
13
14
" AÑO 2020"
15
16
enaho_2020 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta")
17
18
enaho01 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta",
19
convert_categoricals=False)
20
21
labels01 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta",
22
convert_categoricals=False, iterator=True)
23
labels01.variable_labels()
24
25
labels01.value_labels().keys()
26
27
labels01.value_labels()['p110']
28
29
"Modulo 34 Sumaria 2020"
30
31
enaho34 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta",
32
convert_categoricals=False)
33
34
labels34 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta",
35
convert_categoricals=False, iterator=True)
36
37
labels34.variable_labels()
38
39
"Merge 01 + 34"
40
41
42
num = ["34"]
43
44
merge_hog2020 = enaho01
45
46
for i in tqdm(num):
47
merge_hog2020 = pd.merge(merge_hog2020, globals()[f'enaho{i}'],
48
on = ["conglome", "vivienda", "hogar"],
49
how = "left",
50
suffixes=('', '_y'),
51
validate = "1:1")
52
"AÑO 2019"
53
54
enaho0119 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2019/687-Modulo01/687-Modulo01/enaho01-2019-100.dta",
55
convert_categoricals=False)
56
57
labels0119 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2019/687-Modulo01/687-Modulo01/enaho01-2019-100.dta",
58
convert_categoricals=False, iterator=True)
59
labels0119.variable_labels()
60
61
labels0119.value_labels().keys()
62
63
labels0119.value_labels()['p110']
64
65
"Modulo 34 sumaria 2019"
66
67
enaho3419 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2019/687-Modulo34/687-Modulo34/sumaria-2019.dta",
68
convert_categoricals=False)
69
70
labels3419 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2019/687-Modulo34/687-Modulo34/sumaria-2019.dta",
71
convert_categoricals=False, iterator=True)
72
73
labels3419.variable_labels()
74
75
num = ["34"]
76
77
merge_hog2019 = enaho0119
78
for i in tqdm(num):
79
merge_hog2019 = pd.merge(merge_hog2019, globals()[f'enaho{i}'],
80
on = ["conglome", "vivienda", "hogar"],
81
how = "left",
82
suffixes=('', '_y'),
83
validate = "1:1")
84
85
"Append"
86
87
merge_append = merge_hog2020.append(merge_hog2019, ignore_index = True)
88
89
90
91
92
"INGRESO Y GASTO MENNSUAL"
93
94
merge_append["ingreso_month"] = merge_append["inghog1d"]/(12*merge_append["mieperho"])
95
96
merge_append["gasto_month"] = merge_append["gashog2d"]/(12*merge_append["mieperho"])
97
98
"DEFLACTOR ESPACIAL Y TEMPORAL"
99
100
"ESPACIAL"
101
merge_append["gasto_month_defl"] = merge_append["gasto_month"]*(merge_append["ld"])
102
103
"TEMPORAL"
104
105
deflactor = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo34/737-Modulo34/Gasto2020/Bases/deflactores_base2020_new.dta",
106
convert_categoricals=False)
107
num = ["34"]
108
109
merge_base = merge_append
110
111
merge_append['dpto'] = merge_append.ubigeo.str.slice(0,2)
112
113
merge_append['aniorec'] = merge_append['aÑo']
114
115
merge_append['dpto']=merge_append['dpto'].astype(int)
116
merge_append['aniorec']=merge_append['aniorec'].astype(int)
117
merge_append = merge_append.merge(deflactor, how='inner', on=None, left_on=['dpto', 'aniorec'], right_on=['dpto', 'aniorec'], left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None)
118
119
"DIVIDA POR mieperho, 12, LD e i00"
120
121
merge_append["ingreso_month_def"] = merge_append["ingreso_month"]/(merge_append["mieperho"])
122
merge_append["ingreso_month_def2"] = merge_append["ingreso_month"]/12
123
merge_append["ingreso_month_def3"] = merge_append["ingreso_month"]/(merge_append["ld"])
124
merge_append["ingreso_month_def4"] = merge_append["ingreso_month"]/(merge_append["i00"])
125
126
merge_append["gasto_month_def"] = merge_append["gasto_month"]/(merge_append["mieperho"])
127
merge_append["gasto_month_def2"] = merge_append["gasto_month"]/12
128
merge_append["gasto_month_def3"] = merge_append["gasto_month"]/(merge_append["ld"])
129
merge_append["gasto_month_def4"] = merge_append["gasto_month"]/(merge_append["i00"])
130
131
print(merge_append)
132
133
"Groupby"
134
135
enaho02 = pd.read_stata(r"C:/Users/oscar/OneDrive/Desktop/PARCIAL MADATA/enahodf/datos/2020/737-Modulo02/737-Modulo02/enaho01-2020-200.dta",
136
convert_categoricals=False)
137
138
groupby = enaho02.groupby(['p208a'])[['hogar']].head()
139