CoCalc -- Lab4_spyder.py

GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Lab4/Lab4_spyder.py
²⁷¹⁰ views
1
# -*- coding: utf-8 -*-
2
"""
3
Loop replacemnet and Class
4

5
@author: Roberto
6
"""
7

8
import numpy as np
9
import pandas as pd
10
from pandas import DataFrame, Series
11
import statistics
12
import inspect  # Permite conocer los argumentos de una función , classes, etc 
13

14

15

16
#%% Loop replacement
17

18

19
vector = np.arange(100)
20

21
map( lambda x: np.sqrt(x) , vector) 
22

23
list( map( lambda x: np.sqrt(x) , vector)   )    
24

25

26
np.sqrt(vector)
27

28

29
'''
30
Ejemplos
31
'''
32

33

34
'''
35
Función 1
36
'''
37

38
def cube(x):
39
    
40
    out = x*(1/3) - 0.5*x
41
    
42
    return out 
43

44
list( map( lambda x: cube(x) , vector)   )  
45

46
def sdv(x,mean,sd):
47
    
48
    out = (x-mean)/sd
49
    
50
    return out 
51

52
'''
53
Función 2, de estandarización
54
'''
55

56
map( lambda x, v1 = np.mean(vector), v2 = np.std(vector): sdv(x,v1, v2) , vector)
57

58
list( map( lambda x, v1 = np.mean(vector), v2 = np.std(vector): sdv(x,v1, v2) , vector)  )  
59

60

61
vector1 = (vector - np.mean(vector))/np.std(vector)
62

63

64
'''
65
Función 3, extrae lo numeros de un texto
66

67
La función extrae los numeros de texto
68

69
'''
70
import re # Regex
71

72

73
texto_vector = np.array(["Municipio San Luis: 12450","Municipio La victoria: 1450",
74
                         "Municipio La Molina: 3550","Municipio Ate: 506"])
75

76

77
list( map( lambda x: re.sub('([a-z-A-Z])|(:)|[ ]',"", x) , texto_vector)   )
78

79
'''
80
Función 3, If statement
81

82
Valores menos a 50 asigne el numero 1 y asignar missing values para valores mayor i igual a 50
83
'''
84

85
vector = np.arange(100)
86

87
def function2(x):
88
    
89
    if x < 50:
90
         out = 1
91
    else:
92
         out = np.nan # Missing values
93

94
    return out 
95

96
list( map( lambda x: function2(x) , vector)   )
97

98
# re.sub( patron de texto, sustitución, texto)
99

100
''' Loop replacement in Matrix '''
101

102
import numpy as np
103

104
np.random.seed(15632)
105
x1 = np.random.rand(500) # uniform distribution  [0,1]
106
x2 = np.random.rand(500) # uniform distribution [0,1]
107
x3 = np.random.rand(500) # uniform distribution [0,1]
108
x4 = np.random.rand(500) # uniform distribution [0,1]
109

110

111
X = np.column_stack((np.ones(500),x1,x2,x3,x4))
112

113
print(X.shape)
114

115
'''
116

117
En el caso de aplicar funciones como mean, std, y entre otros se puede aplicar pro filas o columnas
118

119
axis = 0 se aplica la función a cada columa
120
axis = 1 se aplica la función por filas
121

122
Numpy apply for matrix
123

124
numpy.apply_along_axis(func1d, axis, arr, *args, **kwargs)
125
 
126
'''
127

128
# mead y desviación estandar por columnas
129

130

131
np.mean(X, axis=0)   # axis = 0 (se aplica por columnas)
132
np.std(X, axis=0)
133

134
# mead y desviación estandar por filas
135

136
np.mean(X, axis=1) 
137
len( np.mean(X, axis=1) )
138
np.std(X, axis=1)
139

140

141

142
'''
143

144
Tres formas de estandarizar una matriz 
145

146
'''
147
   
148
XNormed = (X - np.mean(X, axis=0))/np.std(X, axis=0)
149

150
X_std = np.apply_along_axis(lambda x, prom = 3, desv = 100: (x-prom)/desv,0, X)
151
  
152
    
153
X_std_1 = np.apply_along_axis(lambda x: (x-x.mean())/x.std(),0, X)
154

155
            
156
def standarize(x):
157
       out = (x - np.mean(x))/np.std(x)
158
          
159
       return out
160
   
161
X_std_2 = np.apply_along_axis(standarize,0, X)
162
    
163
# axis = 0, se aplicará la función a los elementos de cada columna
164

165
'''
166

167
Apply to DataFrame
168

169
'''
170
  
171
  
172
# list of name, degree, score
173
var1 = np.random.rand(50000)
174
var2 = np.arange(0,50000)
175
var3 =  np.random.rand(50000)
176
  
177
# dictionary of lists 
178
dict = {'v1': var1, 'v2': var2, 'v3': var3} 
179
    
180
df = pd.DataFrame(dict)
181

182
df.apply(np.sum, axis = 0)  # columna por columna 
183
df.apply(np.sum, axis = 1)  # fila por fila
184

185
# Se genera el cuadrado de la variable v2
186

187
df['nueva_var'] = df['v2'].apply(lambda x : x**2)
188

189
# Cuadradro a los elementos de cada columna
190

191

192
df.apply(lambda x : x**2, axis = 0)
193

194

195
# Estandarización de los elementos de cada columna
196

197
'''
198
Lambda y la inclusión de la función
199
'''
200

201
df.apply(lambda row: (row - np.mean(row))/np.std(row), axis =0)
202

203
'''
204
Lambda y la función construida por separado
205
'''
206

207
def standarize(x):
208
    out = (x - np.mean(x))/np.std(x)
209
    
210
    return out
211

212
df.apply(standarize, axis = 0)
213

214

215
df.apply(lambda row: standarize(row), axis =0)
216

217
#%% Handle dataset 
218

219
'''
220
 We use US census data from the year 2012 to analyse the effect of gender 
221
 and interaction effects of other variables with gender on wage jointly.
222
 The dependent variable is the logarithm of the wage, the target variable is *female*
223
 (in combination with other variables). All other variables denote some other 
224
 socio-economic characteristics, e.g. marital status, education, and experience. 
225
 For a detailed description of the variables we refer to the help page.
226
 '''
227
 
228
import pyreadr  # Load R dataset
229
import os # for usernanme y set direcotrio
230

231
user = os.getlogin()   # Username
232

233
# Set directorio
234

235
os.chdir(f"C:/Users/{user}/Documents/GitHub/1ECO35_2022_2/Lab4") # Set directorio
236

237
cps2012_env = pyreadr.read_r("../data/cps2012.Rdata") # output formato diccionario
238

239

240
cps2012_env  # es un diccionario. En la llave "data" está la base de datos 
241
cps2012 = cps2012_env[ 'data' ] # extrae información almacenada en la llave data del diccionario cps2012_env
242
dt = cps2012.describe()
243
 
244
# Borrar variables constantes 
245

246
variance_cols = cps2012.var().to_numpy() # to numpy
247
X = cps2012.iloc[ : ,  np.where( variance_cols != 0   )[0] ]
248

249
# np.where( variance_cols != 0   ) resulta la posición de lasa columnas con varianza != 0
250

251
np.where( variance_cols != 0   )[0] # array
252

253
# np.where() permite obtener la posición de columnas que cumplen la condición
254

255
# Retirar la media de las variables 
256

257
def demean(x):
258
    dif = x - np.mean( x ) # tima la media de la columna 
259
    return dif 
260

261
X = X.apply( demean, axis = 0 )  # axis :0 se aplica la función por columna
262

263
X.to_stata("../data/clean_data.dta", write_index = False)
264

265
# write_index = False , permite que la base gaurdada no genera una columna para el indexing
266

267

268
#%% *args 
269

270
"""
271
The special syntax *args in function definitions in python is used to pass a variable number 
272
of arguments to a function. The object *args is a tuple that contains all the arguments.
273
 When you build your code, you should consider *args as a tuple.
274
"""
275

276
'''
277
*args : tipo tuple o array
278
'''
279

280
"Keyword: *args, incluir una cantidad variable de argumentos"
281

282

283
def calculator( *args ):
284
    
285
    print( f"args is a {type( args )}" )
286
    
287
    
288
    vector = np.array( list(args) )  # *args : tuple
289
    
290
    minimo = np.min(vector)
291
    
292
    maximo = np.max(vector)
293
    
294
    result = np.prod(vector)
295
    
296
    
297
    return result, minimo, maximo
298

299

300
calculator( 8, 9, 50, 10, 12 ,15,20,100,120)
301

302
'''
303
*args se puede usar otro nombre siempre que se use * al inicio
304
'''
305

306

307
def calculator( *list_vars ):
308
    
309
    print( f"args is a {type( list_vars )}" )
310
    
311
    
312
    vector = np.array( list_vars )  # *args : tuple
313
    
314
    minimo = np.min(vector)
315
    
316
    maximo = np.max(vector)
317
    
318
    result = np.prod(vector)
319
    
320
    
321
    return result, minimo, maximo
322

323

324
calculator( 8, 9, 50, 40, 10, 1)
325

326

327
#%%  *Kwargs
328

329

330
'''
331
**Kwargs is an acronym of keyword arguments. 
332
It works exactly like *Args but instead of accepting a variable number of positional arguments, 
333
it accepts a variable number of keyword or named arguments.
334
'''
335

336
'''
337
**kwargs: tipo diccionario 
338
'''
339

340
def calculator( *list_vars, **kwargs):
341
    
342
    print( type( list_vars ) )
343
    print( type( kwargs ) )
344
    
345
    if ( kwargs[ 'function' ] == "media" ) :
346
        
347
        # Get the first value
348
        result = np.mean( list_vars )
349
    
350
    elif ( kwargs[ 'function' ] == "adicion" ) :
351

352
        result = sum(list_vars)
353
    else:
354
        raise ValueError( f"The function argument {kwargs[ 'function' ]} is not supported." )
355
        
356
        # Mensaje de error por tipo de argumento
357

358
    return result
359

360

361
calculator( 4, 5, 6, 7, 8, function = "adicion" )
362

363
calculator( 4, 5, 6, function = "media" )
364

365
calculator( 4, 5, 6, 7, 8, function = "inversa" )
366

367
calculator( np.arange(10), function = "media" )
368

369
'''
370
Example using dataset cps2012
371
'''
372

373

374

375
def transform(Data, *select, **function) -> pd.DataFrame: #output DataFrame 
376
    
377
    select = list(select)  # se transforma a una lista
378
    Data_select = Data[select] # se filtra por columnas 
379
    
380
    if function['method'] == "demean":
381
        
382
        X = Data_select.apply(lambda row: row - np.mean(row), axis =0)
383
        
384
    elif function['method'] == "estandarize":
385
        
386
        X = Data_select.apply(lambda row: (row - np.mean(row))/np.std(row), axis =0)
387
        
388
    return X
389

390

391
transform(cps2012, "lnw", "exp1","exp2", method = "estandarize")
392

393

394

395
#%%  Class
396

397
class class_name:
398
    
399
    def __init__(self, parameter1, parameter2):
400
        None
401
        
402
## Atributos
403

404
import numpy as np 
405

406
A = np.arange( 8, 25 )
407

408
print(A.size)
409
A.shape
410
A.mean()
411
        
412
dir(A)    # lista de atributos a partir de all, any, ...
413

414

415
"""
416
Method:
417
A function which is defined inside a class body. 
418
If called as an attribute of an instance of that class,
419
 the method will get the instance object as its first argument (which is usually called self). 
420
 See function and nested scope.
421
"""
422

423
from sklearn import linear_model
424
print(dir(linear_model))
425

426

427
#### __init__
428

429
class MyFirstClass:
430
    
431
    def __init__( self, name, age ):
432
        self.name = name
433
        self.age = age
434
    
435
    # best way to define a method
436
    def print_name_1( self ):
437
        print( f'I am { self.name }.' )
438
    
439
    # wrong way to define a method 
440
    def print_name_2(self):
441
        print( f'This is my { self.name }.' )
442
    
443
    
444
    # the worst way to call a parameter
445
    # we need to define them as attributes
446
    def print_name_3( self ):
447
        print( f'This is my { self.name }.' )
448
        
449
student = MyFirstClass( name = "Jose" , age = 22)
450

451
'''
452
Recuperamos los parámetros 
453
'''
454

455
student.age
456
student.print_name_1()
457

458
    
459
class MyFirstClass:
460
    
461
    def __init__( self, name, age, school ):
462
        self.name = name
463
        self.age = age
464
        self.school = school
465
    
466
    # how to define a method
467
    def print_name_1( self ):
468
        print( f'I am { self.name }.' )
469
    
470
    # other method
471
    def person_age( self ):
472
        print( f' I am { self.name } , I am { self.age } old. ' )
473
    
474
    # method
475
    def person_school( self ):
476
        print( f' I am {self.name} , I study at {self.school}. ' )
477
      
478
    # wrong way to define a method 
479
    def print_name_2(self):
480
        print( f'This is my { self.name }.' )
481
    
482
    # the worst way to call a parameter
483
    # we need to define them as attributes
484
    def print_name_3( self ):
485
        print( f'This is my { self.name }.' )        
486

487

488
student = MyFirstClass( name = "Jose" , age = 22, school = "Saco Oliveros" )
489

490
student.name
491
student.age
492

493
print(student.age)
494
print(student.school)
495
student.person_age()
496
student.print_name_1()
497

498
# Modificar propiedades directamente
499

500
student.age = "Pablo"
501

502
'''
503
OLS class
504
'''
505

506
W = [1,2,3]
507

508
from scipy.stats import t # t - student 
509

510
class OLS(object):
511
    
512
    def __init__(self, X,Y, W):
513
        
514
        self.X = X
515
        self.Y = Y
516
        
517
    def Algebralineal(self):
518
        
519
        self.n = self.X.shape[0]
520
        k = self.X.shape[1]
521
        X1 = np.column_stack((np.ones(self.n ), self.X.to_numpy() ))  # self.X.to_numpy()  # DataFrame to numpy
522
        Y1 = self.Y.to_numpy().reshape(self.n  ,1)
523
        self.X1 = X1
524
        self.Y1 = Y1
525
        self.beta = np.linalg.inv(X1.T @ X1) @ ((X1.T) @ Y1 )
526
        self.nk = self.n - k 
527
        
528
    def R2(self):
529
        
530
        self.Algebralineal()  # run function 
531
           
532
        y_est =  self.X1 @ self.beta
533
        error = self.Y1 - y_est
534
        self.SCR = np.sum(np.square(error))
535
        SCT = np.sum(np.square(self.Y1 - np.mean(self.Y1))) 
536

537
        R2 = 1 - self.SCR/SCT
538

539
        return R2           
540

541
    def Table(self, **Kargs):
542
        
543
        # run functions
544
        
545
        self.R2()
546
        self.Algebralineal()
547
        
548
        
549
        
550
        scr = self.SCR
551
        sigma =  scr / self.nk
552
        Var = sigma*np.linalg.inv(self.X1.T @ self.X1)
553
        sd = np.sqrt( np.diag(Var) )
554
        t_est = np.absolute(self.beta/sd)
555
        pvalue = (1 - t.cdf(t_est, df=self.nk) ) * 2
556
            
557
        if (Kargs['Output'] == "DataFrame"):
558

559
               df = pd.DataFrame( {"OLS": self.beta.flatten() , "standar_error" : sd.flatten()} )
560
                
561
                
562
        elif (Kargs['Output'] == "Diccionario"):
563
    
564
            df ={"OLS": self.beta.flatten() , "standar_error" : sd.flatten() ,
565
                                    "Pvalue" : pvalue.flatten()}
566
    
567
    
568
        return df    
569
            
570

571
#flatten():  De multi array a simple array 
572

573

574
Dataset = cps2012.iloc[ : ,  np.where( variance_cols != 0   )[0] ]
575

576
X = Dataset.iloc[:,1:10]
577
Y = Dataset[['lnw']]
578

579
Reg1 = OLS(X,Y)
580

581
Reg1.X
582

583
Reg1.Algebralineal()
584
Reg1.X1
585

586
Reg1.R2()
587

588

589
Reg1.Table(Output = "Diccionario")['OLS'] 
590
Reg1.Table(Output = "Diccionario")['Pvalue'] 
591
Reg1.Table(Output = "Diccionario")['standar_error'] 
592
         
593
Reg1.Table(Output = "DataFrame")
594

595
# Know arguments from function or class
596

597
inspect.getfullargspec(OLS)
598
inspect.getfullargspec(transform)
599

600
help(np)  # inspeccionar una liberia 
601

602

603
dir(OLS) # isnspeccionar metodos e instancias
604

605

606

607

608
Product

Resources

Company