CoCalc -- script

GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG3/Solucion/script_py.py
⁴⁵⁸⁷ views
1
# -*- coding: utf-8 -*-
2
"""
3

4
@author: Roberto
5
"""
6

7

8
import os # for usernanme y set direcotrio
9
import pandas as pd
10
import numpy as np
11

12

13
user = os.getlogin()   # Username
14

15
os.chdir(f"C:/Users/{user}/Documents/GitHub/1ECO35_2022_2/Trabajo_grupal/WG3/Solucion") # Set directorio
16

17

18

19
# a. cargamos la base de datos excel
20

21

22
junin_data = pd.read_excel(r"../../../data/Region_Junin.xlsx")
23

24

25
#1. nombre de las variables 
26

27
junin_data.columns
28

29

30
# 2. Tipo de variables y principales estadísticos
31

32
junin_data.info() 
33

34

35
junin_data.describe() 
36

37

38
# 3. Verificar si las columnas presenta missing values
39

40
# cantidad de missing values por cada variable 
41

42
junin_data.isna().sum()
43

44

45

46
# 4. Cambio de nombre de las variables 
47

48
junin_data.rename(columns = {'Place':'comunidad','men_not_read': 'homxlee', 
49
                  'women_not_read':'mujerxlee', 'total_not_read':'totalxlee'}, inplace = True)
50

51

52
# 5. Valores unicos de las vriables comunidad y district
53

54
junin_data['comunidad'].unique()
55

56

57
junin_data['District'].unique()
58

59

60
# 6. Variables porcentuales
61

62
junin_data['var1'] = (junin_data['mujerxlee']/junin_data['totalxlee'])*100
63

64

65
junin_data['var2'] = (junin_data['homxlee']/junin_data['totalxlee'])*100
66

67

68
# \ permite continuar un codigo extendo en lineas diferentes
69

70
junin_data['var3'] = (junin_data['natives']/(junin_data['peruvian_men']+junin_data['peruvian_women']\
71
                            +junin_data['foreign_men']))*100
72

73

74
# 7. creear base de datos
75

76

77
junin_data = junin_data[ junin_data["District"].isin(["Ciudad del Cerro",
78
                                                      "Jauja", "Acolla", "San Gerónimo", 
79
                                                      "Tarma", "Oroya" , "Concepción"])] 
80

81

82

83

84
junin_data = junin_data[(junin_data.natives > 0) & (junin_data.mestizos > 0)]
85

86

87
junin_data.to_csv("../../../data/data.csv")
88

89

90

91
# Reescalar vector y matriz 
92

93

94
def escalar(x):
95
    out = (x - min(x))/(max(x)-min(x))
96
    return out
97

98

99
list( map(lambda x: escalar(x),
100
          np.arange(50)) )
101

102

103

104
matrix = np.random.randint(1,1000, 5000).reshape(100,50)
105

106
np.apply_along_axis(lambda x: (x-x.min())/(x.max() - x.min()), 0, matrix)
107

108

109
# 3. Keywords en python 
110

111

112
def keywords( *list_vars, **kwargs):
113
    
114
    
115
    if ( kwargs[ 'function' ] == "escalar" ) :
116
        
117
        # Get the first value
118
        
119
        
120
        result = list( map(lambda x: escalar(x),
121
                  list_vars) )
122
        
123
    
124
    elif ( kwargs[ 'function' ] == "estandarizar" ) :
125

126
        result = list( map(lambda x: (x - np.mean(x))/np.std(x), list_vars))
127
        
128
    else:
129
        raise ValueError( f"La función {kwargs[ 'function' ]} no se identifica." )
130
        
131
        # Mensaje de error por tipo de argumento
132

133
    return result
134

135

136

137

138
keywords( np.arange(10), function = "estandarizar")
139

140

141

142
keywords( np.arange(10), function = "escalar")
143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197
Product

Resources

Company