Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Lab4/Lab4_spyder.py
2710 views
1
# -*- coding: utf-8 -*-
2
"""
3
Loop replacemnet and Class
4
5
@author: Roberto
6
"""
7
8
import numpy as np
9
import pandas as pd
10
from pandas import DataFrame, Series
11
import statistics
12
import inspect # Permite conocer los argumentos de una función , classes, etc
13
14
15
16
#%% Loop replacement
17
18
19
vector = np.arange(100)
20
21
map( lambda x: np.sqrt(x) , vector)
22
23
list( map( lambda x: np.sqrt(x) , vector) )
24
25
26
np.sqrt(vector)
27
28
29
'''
30
Ejemplos
31
'''
32
33
34
'''
35
Función 1
36
'''
37
38
def cube(x):
39
40
out = x*(1/3) - 0.5*x
41
42
return out
43
44
list( map( lambda x: cube(x) , vector) )
45
46
def sdv(x,mean,sd):
47
48
out = (x-mean)/sd
49
50
return out
51
52
'''
53
Función 2, de estandarización
54
'''
55
56
map( lambda x, v1 = np.mean(vector), v2 = np.std(vector): sdv(x,v1, v2) , vector)
57
58
list( map( lambda x, v1 = np.mean(vector), v2 = np.std(vector): sdv(x,v1, v2) , vector) )
59
60
61
vector1 = (vector - np.mean(vector))/np.std(vector)
62
63
64
'''
65
Función 3, extrae lo numeros de un texto
66
67
La función extrae los numeros de texto
68
69
'''
70
import re # Regex
71
72
73
texto_vector = np.array(["Municipio San Luis: 12450","Municipio La victoria: 1450",
74
"Municipio La Molina: 3550","Municipio Ate: 506"])
75
76
77
list( map( lambda x: re.sub('([a-z-A-Z])|(:)|[ ]',"", x) , texto_vector) )
78
79
'''
80
Función 3, If statement
81
82
Valores menos a 50 asigne el numero 1 y asignar missing values para valores mayor i igual a 50
83
'''
84
85
vector = np.arange(100)
86
87
def function2(x):
88
89
if x < 50:
90
out = 1
91
else:
92
out = np.nan # Missing values
93
94
return out
95
96
list( map( lambda x: function2(x) , vector) )
97
98
# re.sub( patron de texto, sustitución, texto)
99
100
''' Loop replacement in Matrix '''
101
102
import numpy as np
103
104
np.random.seed(15632)
105
x1 = np.random.rand(500) # uniform distribution [0,1]
106
x2 = np.random.rand(500) # uniform distribution [0,1]
107
x3 = np.random.rand(500) # uniform distribution [0,1]
108
x4 = np.random.rand(500) # uniform distribution [0,1]
109
110
111
X = np.column_stack((np.ones(500),x1,x2,x3,x4))
112
113
print(X.shape)
114
115
'''
116
117
En el caso de aplicar funciones como mean, std, y entre otros se puede aplicar pro filas o columnas
118
119
axis = 0 se aplica la función a cada columa
120
axis = 1 se aplica la función por filas
121
122
Numpy apply for matrix
123
124
numpy.apply_along_axis(func1d, axis, arr, *args, **kwargs)
125
126
'''
127
128
# mead y desviación estandar por columnas
129
130
131
np.mean(X, axis=0) # axis = 0 (se aplica por columnas)
132
np.std(X, axis=0)
133
134
# mead y desviación estandar por filas
135
136
np.mean(X, axis=1)
137
len( np.mean(X, axis=1) )
138
np.std(X, axis=1)
139
140
141
142
'''
143
144
Tres formas de estandarizar una matriz
145
146
'''
147
148
XNormed = (X - np.mean(X, axis=0))/np.std(X, axis=0)
149
150
X_std = np.apply_along_axis(lambda x, prom = 3, desv = 100: (x-prom)/desv,0, X)
151
152
153
X_std_1 = np.apply_along_axis(lambda x: (x-x.mean())/x.std(),0, X)
154
155
156
def standarize(x):
157
out = (x - np.mean(x))/np.std(x)
158
159
return out
160
161
X_std_2 = np.apply_along_axis(standarize,0, X)
162
163
# axis = 0, se aplicará la función a los elementos de cada columna
164
165
'''
166
167
Apply to DataFrame
168
169
'''
170
171
172
# list of name, degree, score
173
var1 = np.random.rand(50000)
174
var2 = np.arange(0,50000)
175
var3 = np.random.rand(50000)
176
177
# dictionary of lists
178
dict = {'v1': var1, 'v2': var2, 'v3': var3}
179
180
df = pd.DataFrame(dict)
181
182
df.apply(np.sum, axis = 0) # columna por columna
183
df.apply(np.sum, axis = 1) # fila por fila
184
185
# Se genera el cuadrado de la variable v2
186
187
df['nueva_var'] = df['v2'].apply(lambda x : x**2)
188
189
# Cuadradro a los elementos de cada columna
190
191
192
df.apply(lambda x : x**2, axis = 0)
193
194
195
# Estandarización de los elementos de cada columna
196
197
'''
198
Lambda y la inclusión de la función
199
'''
200
201
df.apply(lambda row: (row - np.mean(row))/np.std(row), axis =0)
202
203
'''
204
Lambda y la función construida por separado
205
'''
206
207
def standarize(x):
208
out = (x - np.mean(x))/np.std(x)
209
210
return out
211
212
df.apply(standarize, axis = 0)
213
214
215
df.apply(lambda row: standarize(row), axis =0)
216
217
#%% Handle dataset
218
219
'''
220
We use US census data from the year 2012 to analyse the effect of gender
221
and interaction effects of other variables with gender on wage jointly.
222
The dependent variable is the logarithm of the wage, the target variable is *female*
223
(in combination with other variables). All other variables denote some other
224
socio-economic characteristics, e.g. marital status, education, and experience.
225
For a detailed description of the variables we refer to the help page.
226
'''
227
228
import pyreadr # Load R dataset
229
import os # for usernanme y set direcotrio
230
231
user = os.getlogin() # Username
232
233
# Set directorio
234
235
os.chdir(f"C:/Users/{user}/Documents/GitHub/1ECO35_2022_2/Lab4") # Set directorio
236
237
cps2012_env = pyreadr.read_r("../data/cps2012.Rdata") # output formato diccionario
238
239
240
cps2012_env # es un diccionario. En la llave "data" está la base de datos
241
cps2012 = cps2012_env[ 'data' ] # extrae información almacenada en la llave data del diccionario cps2012_env
242
dt = cps2012.describe()
243
244
# Borrar variables constantes
245
246
variance_cols = cps2012.var().to_numpy() # to numpy
247
X = cps2012.iloc[ : , np.where( variance_cols != 0 )[0] ]
248
249
# np.where( variance_cols != 0 ) resulta la posición de lasa columnas con varianza != 0
250
251
np.where( variance_cols != 0 )[0] # array
252
253
# np.where() permite obtener la posición de columnas que cumplen la condición
254
255
# Retirar la media de las variables
256
257
def demean(x):
258
dif = x - np.mean( x ) # tima la media de la columna
259
return dif
260
261
X = X.apply( demean, axis = 0 ) # axis :0 se aplica la función por columna
262
263
X.to_stata("../data/clean_data.dta", write_index = False)
264
265
# write_index = False , permite que la base gaurdada no genera una columna para el indexing
266
267
268
#%% *args
269
270
"""
271
The special syntax *args in function definitions in python is used to pass a variable number
272
of arguments to a function. The object *args is a tuple that contains all the arguments.
273
When you build your code, you should consider *args as a tuple.
274
"""
275
276
'''
277
*args : tipo tuple o array
278
'''
279
280
"Keyword: *args, incluir una cantidad variable de argumentos"
281
282
283
def calculator( *args ):
284
285
print( f"args is a {type( args )}" )
286
287
288
vector = np.array( list(args) ) # *args : tuple
289
290
minimo = np.min(vector)
291
292
maximo = np.max(vector)
293
294
result = np.prod(vector)
295
296
297
return result, minimo, maximo
298
299
300
calculator( 8, 9, 50, 10, 12 ,15,20,100,120)
301
302
'''
303
*args se puede usar otro nombre siempre que se use * al inicio
304
'''
305
306
307
def calculator( *list_vars ):
308
309
print( f"args is a {type( list_vars )}" )
310
311
312
vector = np.array( list_vars ) # *args : tuple
313
314
minimo = np.min(vector)
315
316
maximo = np.max(vector)
317
318
result = np.prod(vector)
319
320
321
return result, minimo, maximo
322
323
324
calculator( 8, 9, 50, 40, 10, 1)
325
326
327
#%% *Kwargs
328
329
330
'''
331
**Kwargs is an acronym of keyword arguments.
332
It works exactly like *Args but instead of accepting a variable number of positional arguments,
333
it accepts a variable number of keyword or named arguments.
334
'''
335
336
'''
337
**kwargs: tipo diccionario
338
'''
339
340
def calculator( *list_vars, **kwargs):
341
342
print( type( list_vars ) )
343
print( type( kwargs ) )
344
345
if ( kwargs[ 'function' ] == "media" ) :
346
347
# Get the first value
348
result = np.mean( list_vars )
349
350
elif ( kwargs[ 'function' ] == "adicion" ) :
351
352
result = sum(list_vars)
353
else:
354
raise ValueError( f"The function argument {kwargs[ 'function' ]} is not supported." )
355
356
# Mensaje de error por tipo de argumento
357
358
return result
359
360
361
calculator( 4, 5, 6, 7, 8, function = "adicion" )
362
363
calculator( 4, 5, 6, function = "media" )
364
365
calculator( 4, 5, 6, 7, 8, function = "inversa" )
366
367
calculator( np.arange(10), function = "media" )
368
369
'''
370
Example using dataset cps2012
371
'''
372
373
374
375
def transform(Data, *select, **function) -> pd.DataFrame: #output DataFrame
376
377
select = list(select) # se transforma a una lista
378
Data_select = Data[select] # se filtra por columnas
379
380
if function['method'] == "demean":
381
382
X = Data_select.apply(lambda row: row - np.mean(row), axis =0)
383
384
elif function['method'] == "estandarize":
385
386
X = Data_select.apply(lambda row: (row - np.mean(row))/np.std(row), axis =0)
387
388
return X
389
390
391
transform(cps2012, "lnw", "exp1","exp2", method = "estandarize")
392
393
394
395
#%% Class
396
397
class class_name:
398
399
def __init__(self, parameter1, parameter2):
400
None
401
402
## Atributos
403
404
import numpy as np
405
406
A = np.arange( 8, 25 )
407
408
print(A.size)
409
A.shape
410
A.mean()
411
412
dir(A) # lista de atributos a partir de all, any, ...
413
414
415
"""
416
Method:
417
A function which is defined inside a class body.
418
If called as an attribute of an instance of that class,
419
the method will get the instance object as its first argument (which is usually called self).
420
See function and nested scope.
421
"""
422
423
from sklearn import linear_model
424
print(dir(linear_model))
425
426
427
#### __init__
428
429
class MyFirstClass:
430
431
def __init__( self, name, age ):
432
self.name = name
433
self.age = age
434
435
# best way to define a method
436
def print_name_1( self ):
437
print( f'I am { self.name }.' )
438
439
# wrong way to define a method
440
def print_name_2(self):
441
print( f'This is my { self.name }.' )
442
443
444
# the worst way to call a parameter
445
# we need to define them as attributes
446
def print_name_3( self ):
447
print( f'This is my { self.name }.' )
448
449
student = MyFirstClass( name = "Jose" , age = 22)
450
451
'''
452
Recuperamos los parámetros
453
'''
454
455
student.age
456
student.print_name_1()
457
458
459
class MyFirstClass:
460
461
def __init__( self, name, age, school ):
462
self.name = name
463
self.age = age
464
self.school = school
465
466
# how to define a method
467
def print_name_1( self ):
468
print( f'I am { self.name }.' )
469
470
# other method
471
def person_age( self ):
472
print( f' I am { self.name } , I am { self.age } old. ' )
473
474
# method
475
def person_school( self ):
476
print( f' I am {self.name} , I study at {self.school}. ' )
477
478
# wrong way to define a method
479
def print_name_2(self):
480
print( f'This is my { self.name }.' )
481
482
# the worst way to call a parameter
483
# we need to define them as attributes
484
def print_name_3( self ):
485
print( f'This is my { self.name }.' )
486
487
488
student = MyFirstClass( name = "Jose" , age = 22, school = "Saco Oliveros" )
489
490
student.name
491
student.age
492
493
print(student.age)
494
print(student.school)
495
student.person_age()
496
student.print_name_1()
497
498
# Modificar propiedades directamente
499
500
student.age = "Pablo"
501
502
'''
503
OLS class
504
'''
505
506
W = [1,2,3]
507
508
from scipy.stats import t # t - student
509
510
class OLS(object):
511
512
def __init__(self, X,Y, W):
513
514
self.X = X
515
self.Y = Y
516
517
def Algebralineal(self):
518
519
self.n = self.X.shape[0]
520
k = self.X.shape[1]
521
X1 = np.column_stack((np.ones(self.n ), self.X.to_numpy() )) # self.X.to_numpy() # DataFrame to numpy
522
Y1 = self.Y.to_numpy().reshape(self.n ,1)
523
self.X1 = X1
524
self.Y1 = Y1
525
self.beta = np.linalg.inv(X1.T @ X1) @ ((X1.T) @ Y1 )
526
self.nk = self.n - k
527
528
def R2(self):
529
530
self.Algebralineal() # run function
531
532
y_est = self.X1 @ self.beta
533
error = self.Y1 - y_est
534
self.SCR = np.sum(np.square(error))
535
SCT = np.sum(np.square(self.Y1 - np.mean(self.Y1)))
536
537
R2 = 1 - self.SCR/SCT
538
539
return R2
540
541
def Table(self, **Kargs):
542
543
# run functions
544
545
self.R2()
546
self.Algebralineal()
547
548
549
550
scr = self.SCR
551
sigma = scr / self.nk
552
Var = sigma*np.linalg.inv(self.X1.T @ self.X1)
553
sd = np.sqrt( np.diag(Var) )
554
t_est = np.absolute(self.beta/sd)
555
pvalue = (1 - t.cdf(t_est, df=self.nk) ) * 2
556
557
if (Kargs['Output'] == "DataFrame"):
558
559
df = pd.DataFrame( {"OLS": self.beta.flatten() , "standar_error" : sd.flatten()} )
560
561
562
elif (Kargs['Output'] == "Diccionario"):
563
564
df ={"OLS": self.beta.flatten() , "standar_error" : sd.flatten() ,
565
"Pvalue" : pvalue.flatten()}
566
567
568
return df
569
570
571
#flatten(): De multi array a simple array
572
573
574
Dataset = cps2012.iloc[ : , np.where( variance_cols != 0 )[0] ]
575
576
X = Dataset.iloc[:,1:10]
577
Y = Dataset[['lnw']]
578
579
Reg1 = OLS(X,Y)
580
581
Reg1.X
582
583
Reg1.Algebralineal()
584
Reg1.X1
585
586
Reg1.R2()
587
588
589
Reg1.Table(Output = "Diccionario")['OLS']
590
Reg1.Table(Output = "Diccionario")['Pvalue']
591
Reg1.Table(Output = "Diccionario")['standar_error']
592
593
Reg1.Table(Output = "DataFrame")
594
595
# Know arguments from function or class
596
597
inspect.getfullargspec(OLS)
598
inspect.getfullargspec(transform)
599
600
help(np) # inspeccionar una liberia
601
602
603
dir(OLS) # isnspeccionar metodos e instancias
604
605
606
607
608