CoCalc -- Grupo_8

GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG1/Grupo_8_py.Py
⁴⁶¹³ views
1
#%% Pregunta 1
2

3
#Creamos un vector cuyos datos estén entre 0 y 500 y contenga 20 datos.
4

5
import random
6
import numpy as np
7
import math
8

9
x = np.random.randint(0, 500, 20)
10

11
# Elaboramos una estructura If statement para la siguiente función
12
# y aplicamos condición a cada uno de los elementos
13

14
def calculator(x):
15
    x = x
16
    
17
    if 0<=x<100:
18
       return f"F(X)= {x **(1/2)}"
19
    elif 100<=x<300:
20
       return f"F(X)={x-5}"
21
    elif 300<=x:
22
       return print( "F(X)=50")
23
   
24
print(calculator(300))
25

26

27
#%%Pregunta 2
28

29
import numpy as np
30
# creamos un vector "v" de 100 observaciones
31
v = np.arange (0,100) 
32
print(v)
33

34
np.min(v) #para ir observando el mínimo de dicho vector
35
np.max(v)
36

37
#creamos una matriz "M"
38
M = np.arange(0,5000).reshape (100, 50) #esta tiene valores del 0 al 4999 y se utiliza el "reshape" para que sea una matriz de 100x50
39
print(M)
40
M.shape #comprobamos que M sea una matriz de 100x50
41
type(v)
42
type(M) #observamos que es numpy.ndarray
43

44
print(np.min(M, axis=0)) 
45
print(np.max(M, axis=0))
46
#para reescalar los datos de las columnas de la matriz, queremos ver sus mínimos y máximos por columna
47
X = np.min(M, axis=0) #axis=0 pq queremos observar por columnas
48
Y = np.max(M, axis=0)
49
print(X)
50
X.shape
51

52
#reescalamos vector y matriz
53
try:     
54
    
55
    print((v-min(v))/max(v)-min(v))   # No corre el código si detecta un error 
56
    
57
except TypeError:
58
  print("El argumento  deberia ser un vector")
59
  
60
try:     
61
    
62
    print((M-X)/Y-X)   # No corre el código si detecta un error 
63
    
64
except TypeError:
65
  print("El argumento  deberia ser una matriz")
66

67
#%%Pregunta 3
68

69
#Importamos las bases requeridas:
70
import random 
71
import pandas as pd
72
import numpy as np
73

74
#Establecemos una base de datos que no cambia para cada tamaño de muestra
75
random.seed(100000)
76
random.sample(range(10000),k=10)
77

78
#Se establece los X necesarios para un tamaño de muestra de 10
79
x1 = np.random.rand(10) # uniform distribution  [0,1]
80
x2 = np.random.rand(10) # uniform distribution [0,1]
81
x3 = np.random.rand(10) # uniform distribution [0,1]
82
x4 = np.random.rand(10) # uniform distribution [0,1]
83
e = np.random.normal(0,1,10) # normal distribution mean = 0 and sd = 1
84
z = np.random.rand(10)
85
# Se genera la ecuación de regresión
86

87
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
88

89
X = np.column_stack((np.ones(10),x1,x2,x3,x4))
90
X
91

92
from scipy.stats import t # t - student 
93
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
94

95
    if standar and Pvalue and (instrumento is None)  and (index is None) :
96
        
97
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
98
        
99
        y_est =  X @ beta   ## Y estimado 
100
        n = X.shape[0]
101
        k = X.shape[1] - 1  
102
        nk = n - k     ## grados de libertad
103
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
104
        Var = sigma*np.linalg.inv(X.T @ X)
105
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
106
        t_est = np.absolute(beta/sd)
107
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
108
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
109
                                 "Pvalue" : pvalue})    
110

111
    
112
    elif (not instrumento is None) and (not index is None) :
113
        
114
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
115
        
116
        index = index  - 1 
117
        Z = X
118
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
119
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
120
        x_est  = Z @ beta_x
121
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
122
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
123
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
124

125
    return df
126

127

128

129
ols(X,Y)
130

131
ols(X,Y,instrumento = z, index = 1)
132

133
# Para un tamaño de muestra de 50
134

135
random.sample(range(10000),k=50)
136
x1 = np.random.rand(50) # uniform distribution  [0,1]
137
x2 = np.random.rand(50) # uniform distribution [0,1]
138
x3 = np.random.rand(50) # uniform distribution [0,1]
139
x4 = np.random.rand(50) # uniform distribution [0,1]
140
e = np.random.normal(0,1,50) # normal distribution mean = 0 and sd = 1
141
z = np.random.rand(50)
142
# Poblacional regression (Data Generating Process GDP)
143

144

145
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
146
X = np.column_stack((np.ones(50),x1,x2,x3,x4))
147
X
148
from scipy.stats import t # t - student 
149
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
150

151
    if standar and Pvalue and (instrumento is None)  and (index is None) :
152
        
153
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
154
        
155
        y_est =  X @ beta   ## Y estimado 
156
        n = X.shape[0]
157
        k = X.shape[1] - 1  
158
        nk = n - k     ## grados de libertad
159
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
160
        Var = sigma*np.linalg.inv(X.T @ X)
161
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
162
        t_est = np.absolute(beta/sd)
163
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
164
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
165
                                 "Pvalue" : pvalue})    
166

167
    
168
    elif (not instrumento is None) and (not index is None) :
169
        
170
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
171
        
172
        index = index  - 1 
173
        Z = X
174
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
175
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
176
        x_est  = Z @ beta_x
177
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
178
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
179
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
180

181
    return df
182

183

184

185
ols(X,Y)
186

187
ols(X,Y,instrumento = z, index = 1)
188

189
# Para una muestra de 80 
190

191
random.sample(range(10000),k=80)
192
x1 = np.random.rand(80) # uniform distribution  [0,1]
193
x2 = np.random.rand(80) # uniform distribution [0,1]
194
x3 = np.random.rand(80) # uniform distribution [0,1]
195
x4 = np.random.rand(80) # uniform distribution [0,1]
196
e = np.random.normal(0,1,80) # normal distribution mean = 0 and sd = 1
197
z = np.random.rand(80)
198
# Poblacional regression (Data Generating Process GDP)
199

200

201
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
202

203
X = np.column_stack((np.ones(80),x1,x2,x3,x4))
204
X
205
from scipy.stats import t # t - student 
206
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
207

208
    if standar and Pvalue and (instrumento is None)  and (index is None) :
209
        
210
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
211
        
212
        y_est =  X @ beta   ## Y estimado 
213
        n = X.shape[0]
214
        k = X.shape[1] - 1  
215
        nk = n - k     ## grados de libertad
216
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
217
        Var = sigma*np.linalg.inv(X.T @ X)
218
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
219
        t_est = np.absolute(beta/sd)
220
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
221
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
222
                                 "Pvalue" : pvalue})    
223

224
    
225
    elif (not instrumento is None) and (not index is None) :
226
        
227
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
228
        
229
        index = index  - 1 
230
        Z = X
231
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
232
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
233
        x_est  = Z @ beta_x
234
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
235
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
236
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
237

238
    return df
239

240

241

242
ols(X,Y)
243

244
ols(X,Y,instrumento = z, index = 1)
245

246
# Para una muestra de 120
247

248
random.sample(range(10000),k=120)
249
x1 = np.random.rand(120) # uniform distribution  [0,1]
250
x2 = np.random.rand(120) # uniform distribution [0,1]
251
x3 = np.random.rand(120) # uniform distribution [0,1]
252
x4 = np.random.rand(120) # uniform distribution [0,1]
253
e = np.random.normal(0,1,120) # normal distribution mean = 0 and sd = 1
254
z = np.random.rand(120)
255
# Poblacional regression (Data Generating Process GDP)
256

257

258
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
259
X = np.column_stack((np.ones(120),x1,x2,x3,x4))
260
X
261
from scipy.stats import t # t - student 
262
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
263

264
    if standar and Pvalue and (instrumento is None)  and (index is None) :
265
        
266
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
267
        
268
        y_est =  X @ beta   ## Y estimado 
269
        n = X.shape[0]
270
        k = X.shape[1] - 1  
271
        nk = n - k     ## grados de libertad
272
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
273
        Var = sigma*np.linalg.inv(X.T @ X)
274
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
275
        t_est = np.absolute(beta/sd)
276
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
277
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
278
                                 "Pvalue" : pvalue})    
279

280
    
281
    elif (not instrumento is None) and (not index is None) :
282
        
283
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
284
        
285
        index = index  - 1 
286
        Z = X
287
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
288
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
289
        x_est  = Z @ beta_x
290
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
291
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
292
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
293

294
    return df
295

296

297

298
ols(X,Y)
299

300
ols(X,Y,instrumento = z, index = 1)
301

302

303
# Para una muestra de 200
304

305
random.sample(range(10000),k=200)
306
x1 = np.random.rand(200) # uniform distribution  [0,1]
307
x2 = np.random.rand(200) # uniform distribution [0,1]
308
x3 = np.random.rand(200) # uniform distribution [0,1]
309
x4 = np.random.rand(200) # uniform distribution [0,1]
310
e = np.random.normal(0,1,200) # normal distribution mean = 0 and sd = 1
311
z = np.random.rand(200)
312
# Poblacional regression (Data Generating Process GDP)
313

314

315
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
316
X = np.column_stack((np.ones(200),x1,x2,x3,x4))
317
X
318
from scipy.stats import t # t - student 
319
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
320

321
    if standar and Pvalue and (instrumento is None)  and (index is None) :
322
        
323
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
324
        
325
        y_est =  X @ beta   ## Y estimado 
326
        n = X.shape[0]
327
        k = X.shape[1] - 1  
328
        nk = n - k     ## grados de libertad
329
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
330
        Var = sigma*np.linalg.inv(X.T @ X)
331
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
332
        t_est = np.absolute(beta/sd)
333
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
334
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
335
                                 "Pvalue" : pvalue})    
336

337
    
338
    elif (not instrumento is None) and (not index is None) :
339
        
340
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
341
        
342
        index = index  - 1 
343
        Z = X
344
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
345
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
346
        x_est  = Z @ beta_x
347
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
348
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
349
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
350

351
    return df
352

353
# Para una muestra de 500
354

355
ols(X,Y)
356

357
ols(X,Y,instrumento = z, index = 1)
358

359
random.sample(range(10000),k=500)
360
x1 = np.random.rand(500) # uniform distribution  [0,1]
361
x2 = np.random.rand(500) # uniform distribution [0,1]
362
x3 = np.random.rand(500) # uniform distribution [0,1]
363
x4 = np.random.rand(500) # uniform distribution [0,1]
364
e = np.random.normal(0,1,500) # normal distribution mean = 0 and sd = 1
365
z = np.random.rand(500)
366
# Poblacional regression (Data Generating Process GDP)
367

368

369
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
370
X = np.column_stack((np.ones(500),x1,x2,x3,x4))
371
X
372
from scipy.stats import t # t - student 
373
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
374

375
    if standar and Pvalue and (instrumento is None)  and (index is None) :
376
        
377
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
378
        
379
        y_est =  X @ beta   ## Y estimado 
380
        n = X.shape[0]
381
        k = X.shape[1] - 1  
382
        nk = n - k     ## grados de libertad
383
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
384
        Var = sigma*np.linalg.inv(X.T @ X)
385
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
386
        t_est = np.absolute(beta/sd)
387
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
388
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
389
                                 "Pvalue" : pvalue})    
390

391
    
392
    elif (not instrumento is None) and (not index is None) :
393
        
394
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
395
        
396
        index = index  - 1 
397
        Z = X
398
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
399
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
400
        x_est  = Z @ beta_x
401
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
402
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
403
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
404

405
    return df
406

407

408

409
ols(X,Y)
410

411
ols(X,Y,instrumento = z, index = 1)
412

413
# Para una muestra de 1000
414

415
random.sample(range(10000),k=800)
416
x1 = np.random.rand(800) # uniform distribution  [0,1]
417
x2 = np.random.rand(800) # uniform distribution [0,1]
418
x3 = np.random.rand(800) # uniform distribution [0,1]
419
x4 = np.random.rand(800) # uniform distribution [0,1]
420
e = np.random.normal(0,1,800) # normal distribution mean = 0 and sd = 1
421
z = np.random.rand(800)
422
# Poblacional regression (Data Generating Process GDP)
423

424

425
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
426
X = np.column_stack((np.ones(800),x1,x2,x3,x4))
427
X
428
from scipy.stats import t # t - student 
429
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
430

431
    if standar and Pvalue and (instrumento is None)  and (index is None) :
432
        
433
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
434
        
435
        y_est =  X @ beta   ## Y estimado 
436
        n = X.shape[0]
437
        k = X.shape[1] - 1  
438
        nk = n - k     ## grados de libertad
439
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
440
        Var = sigma*np.linalg.inv(X.T @ X)
441
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
442
        t_est = np.absolute(beta/sd)
443
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
444
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
445
                                 "Pvalue" : pvalue})    
446

447
    
448
    elif (not instrumento is None) and (not index is None) :
449
        
450
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
451
        
452
        index = index  - 1 
453
        Z = X
454
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
455
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
456
        x_est  = Z @ beta_x
457
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
458
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
459
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
460

461
    return df
462

463

464

465
ols(X,Y)
466

467
ols(X,Y,instrumento = z, index = 1)
468

469

470
# Para una muestra de 5000
471
random.sample(range(10000),k=1000)
472
x1 = np.random.rand(1000) # uniform distribution  [0,1]
473
x2 = np.random.rand(1000) # uniform distribution [0,1]
474
x3 = np.random.rand(1000) # uniform distribution [0,1]
475
x4 = np.random.rand(1000) # uniform distribution [0,1]
476
e = np.random.normal(0,1,1000) # normal distribution mean = 0 and sd = 1
477
z = np.random.rand(1000)
478
# Poblacional regression (Data Generating Process GDP)
479

480

481
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
482
X = np.column_stack((np.ones(1000),x1,x2,x3,x4))
483
X
484
from scipy.stats import t # t - student 
485
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
486

487
    if standar and Pvalue and (instrumento is None)  and (index is None) :
488
        
489
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
490
        
491
        y_est =  X @ beta   ## Y estimado 
492
        n = X.shape[0]
493
        k = X.shape[1] - 1  
494
        nk = n - k     ## grados de libertad
495
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
496
        Var = sigma*np.linalg.inv(X.T @ X)
497
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
498
        t_est = np.absolute(beta/sd)
499
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
500
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
501
                                 "Pvalue" : pvalue})    
502

503
    
504
    elif (not instrumento is None) and (not index is None) :
505
        
506
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
507
        
508
        index = index  - 1 
509
        Z = X
510
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
511
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
512
        x_est  = Z @ beta_x
513
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
514
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
515
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
516

517
    return df
518

519

520

521
ols(X,Y)
522

523
ols(X,Y,instrumento = z, index = 1)
524

525
# PREGUNTA 4
526

527
random.sample(range(800),k=50)
528
x1 = np.random.rand(800) # uniform distribution  [0,1]
529
x2 = np.random.rand(800) # uniform distribution [0,1]
530
x3 = np.random.rand(800) # uniform distribution [0,1]
531
x4 = np.random.rand(800) # uniform distribution [0,1]
532
x5 = np.random.rand(800) # uniform distribution [0,1]
533
x6 = np.random.rand(800)# uniform distribution [0,1]
534
x7 = np.random.rand(800)# uniform distribution [0,1]
535

536
e = np.random.normal(0,1,800) # normal distribution mean = 0 and sd = 1
537
z = np.random.rand(800)
538
# Poblacional regression (Data Generating Process GDP)
539

540

541
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4  +1.5*x5 +1.5*x6 +1.5*x7+ e
542
X = np.column_stack((np.ones(800),x1,x2,x3,x4,x5,x6,x7))
543
X
544
from scipy.stats import t # t - student 
545
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
546

547
    if standar and Pvalue and (instrumento is None)  and (index is None) :
548
        
549
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
550
        
551
        y_est =  X @ beta   ## Y estimado 
552
        n = X.shape[0]
553
        k = X.shape[1] - 1  
554
        nk = n - k     ## grados de libertad
555
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
556
        Var = sigma*np.linalg.inv(X.T @ X)
557
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
558
        t_est = np.absolute(beta/sd)
559
        root  =sum(list( map( lambda x: x**2 , Y - y_est)   )) / n
560
        liminf = beta - 1.96 * sd # Limite inferior
561
        limsup = beta + 1.96*sd # Limite superior
562
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
563
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
564
                                 "Pvalue" : pvalue, "Lim.Inf": liminf, "Limf.Sup": limsup})    
565

566
    
567
    elif (not instrumento is None) and (not index is None) :
568
        
569
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
570
        
571
        index = index  - 1 
572
        Z = X
573
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
574
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
575
        x_est  = Z @ beta_x
576
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
577
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
578
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
579

580
    return df
581

582
ols(X,Y)
583

584

585

586
#%%Pregunta 3
587

588
#Importamos las bases requeridas:
589
import random 
590
import pandas as pd
591
import numpy as np
592

593
#Establecemos una base de datos que no cambia para cada tamaño de muestra
594
random.seed(100000)
595
random.sample(range(10000),k=10)
596

597
#Se establece los X necesarios para un tamaño de muestra de 10
598
x1 = np.random.rand(10) # uniform distribution  [0,1]
599
x2 = np.random.rand(10) # uniform distribution [0,1]
600
x3 = np.random.rand(10) # uniform distribution [0,1]
601
x4 = np.random.rand(10) # uniform distribution [0,1]
602
e = np.random.normal(0,1,10) # normal distribution mean = 0 and sd = 1
603
z = np.random.rand(10)
604
# Se genera la ecuación de regresión
605

606
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
607

608
X = np.column_stack((np.ones(10),x1,x2,x3,x4))
609
X
610

611
from scipy.stats import t # t - student 
612
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
613

614
    if standar and Pvalue and (instrumento is None)  and (index is None) :
615
        
616
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
617
        
618
        y_est =  X @ beta   ## Y estimado 
619
        n = X.shape[0]
620
        k = X.shape[1] - 1  
621
        nk = n - k     ## grados de libertad
622
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
623
        Var = sigma*np.linalg.inv(X.T @ X)
624
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
625
        t_est = np.absolute(beta/sd)
626
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
627
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
628
                                 "Pvalue" : pvalue})    
629

630
    
631
    elif (not instrumento is None) and (not index is None) :
632
        
633
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
634
        
635
        index = index  - 1 
636
        Z = X
637
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
638
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
639
        x_est  = Z @ beta_x
640
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
641
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
642
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
643

644
    return df
645

646

647

648
ols(X,Y)
649

650
ols(X,Y,instrumento = z, index = 1)
651

652
# Para un tamaño de muestra de 50
653

654
random.sample(range(10000),k=50)
655
x1 = np.random.rand(50) # uniform distribution  [0,1]
656
x2 = np.random.rand(50) # uniform distribution [0,1]
657
x3 = np.random.rand(50) # uniform distribution [0,1]
658
x4 = np.random.rand(50) # uniform distribution [0,1]
659
e = np.random.normal(0,1,50) # normal distribution mean = 0 and sd = 1
660
z = np.random.rand(50)
661
# Poblacional regression (Data Generating Process GDP)
662

663

664
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
665
X = np.column_stack((np.ones(50),x1,x2,x3,x4))
666
X
667
from scipy.stats import t # t - student 
668
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
669

670
    if standar and Pvalue and (instrumento is None)  and (index is None) :
671
        
672
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
673
        
674
        y_est =  X @ beta   ## Y estimado 
675
        n = X.shape[0]
676
        k = X.shape[1] - 1  
677
        nk = n - k     ## grados de libertad
678
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
679
        Var = sigma*np.linalg.inv(X.T @ X)
680
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
681
        t_est = np.absolute(beta/sd)
682
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
683
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
684
                                 "Pvalue" : pvalue})    
685

686
    
687
    elif (not instrumento is None) and (not index is None) :
688
        
689
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
690
        
691
        index = index  - 1 
692
        Z = X
693
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
694
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
695
        x_est  = Z @ beta_x
696
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
697
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
698
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
699

700
    return df
701

702

703

704
ols(X,Y)
705

706
ols(X,Y,instrumento = z, index = 1)
707

708
# Para una muestra de 80 
709

710
random.sample(range(10000),k=80)
711
x1 = np.random.rand(80) # uniform distribution  [0,1]
712
x2 = np.random.rand(80) # uniform distribution [0,1]
713
x3 = np.random.rand(80) # uniform distribution [0,1]
714
x4 = np.random.rand(80) # uniform distribution [0,1]
715
e = np.random.normal(0,1,80) # normal distribution mean = 0 and sd = 1
716
z = np.random.rand(80)
717
# Poblacional regression (Data Generating Process GDP)
718

719

720
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
721

722
X = np.column_stack((np.ones(80),x1,x2,x3,x4))
723
X
724
from scipy.stats import t # t - student 
725
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
726

727
    if standar and Pvalue and (instrumento is None)  and (index is None) :
728
        
729
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
730
        
731
        y_est =  X @ beta   ## Y estimado 
732
        n = X.shape[0]
733
        k = X.shape[1] - 1  
734
        nk = n - k     ## grados de libertad
735
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
736
        Var = sigma*np.linalg.inv(X.T @ X)
737
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
738
        t_est = np.absolute(beta/sd)
739
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
740
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
741
                                 "Pvalue" : pvalue})    
742

743
    
744
    elif (not instrumento is None) and (not index is None) :
745
        
746
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
747
        
748
        index = index  - 1 
749
        Z = X
750
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
751
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
752
        x_est  = Z @ beta_x
753
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
754
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
755
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
756

757
    return df
758

759

760

761
ols(X,Y)
762

763
ols(X,Y,instrumento = z, index = 1)
764

765
# Para una muestra de 120
766

767
random.sample(range(10000),k=120)
768
x1 = np.random.rand(120) # uniform distribution  [0,1]
769
x2 = np.random.rand(120) # uniform distribution [0,1]
770
x3 = np.random.rand(120) # uniform distribution [0,1]
771
x4 = np.random.rand(120) # uniform distribution [0,1]
772
e = np.random.normal(0,1,120) # normal distribution mean = 0 and sd = 1
773
z = np.random.rand(120)
774
# Poblacional regression (Data Generating Process GDP)
775

776

777
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
778
X = np.column_stack((np.ones(120),x1,x2,x3,x4))
779
X
780
from scipy.stats import t # t - student 
781
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
782

783
    if standar and Pvalue and (instrumento is None)  and (index is None) :
784
        
785
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
786
        
787
        y_est =  X @ beta   ## Y estimado 
788
        n = X.shape[0]
789
        k = X.shape[1] - 1  
790
        nk = n - k     ## grados de libertad
791
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
792
        Var = sigma*np.linalg.inv(X.T @ X)
793
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
794
        t_est = np.absolute(beta/sd)
795
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
796
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
797
                                 "Pvalue" : pvalue})    
798

799
    
800
    elif (not instrumento is None) and (not index is None) :
801
        
802
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
803
        
804
        index = index  - 1 
805
        Z = X
806
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
807
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
808
        x_est  = Z @ beta_x
809
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
810
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
811
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
812

813
    return df
814

815

816

817
ols(X,Y)
818

819
ols(X,Y,instrumento = z, index = 1)
820

821

822
# Para una muestra de 200
823

824
random.sample(range(10000),k=200)
825
x1 = np.random.rand(200) # uniform distribution  [0,1]
826
x2 = np.random.rand(200) # uniform distribution [0,1]
827
x3 = np.random.rand(200) # uniform distribution [0,1]
828
x4 = np.random.rand(200) # uniform distribution [0,1]
829
e = np.random.normal(0,1,200) # normal distribution mean = 0 and sd = 1
830
z = np.random.rand(200)
831
# Poblacional regression (Data Generating Process GDP)
832

833

834
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
835
X = np.column_stack((np.ones(200),x1,x2,x3,x4))
836
X
837
from scipy.stats import t # t - student 
838
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
839

840
    if standar and Pvalue and (instrumento is None)  and (index is None) :
841
        
842
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
843
        
844
        y_est =  X @ beta   ## Y estimado 
845
        n = X.shape[0]
846
        k = X.shape[1] - 1  
847
        nk = n - k     ## grados de libertad
848
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
849
        Var = sigma*np.linalg.inv(X.T @ X)
850
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
851
        t_est = np.absolute(beta/sd)
852
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
853
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
854
                                 "Pvalue" : pvalue})    
855

856
    
857
    elif (not instrumento is None) and (not index is None) :
858
        
859
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
860
        
861
        index = index  - 1 
862
        Z = X
863
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
864
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
865
        x_est  = Z @ beta_x
866
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
867
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
868
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
869

870
    return df
871

872
# Para una muestra de 500
873

874
ols(X,Y)
875

876
ols(X,Y,instrumento = z, index = 1)
877

878
random.sample(range(10000),k=500)
879
x1 = np.random.rand(500) # uniform distribution  [0,1]
880
x2 = np.random.rand(500) # uniform distribution [0,1]
881
x3 = np.random.rand(500) # uniform distribution [0,1]
882
x4 = np.random.rand(500) # uniform distribution [0,1]
883
e = np.random.normal(0,1,500) # normal distribution mean = 0 and sd = 1
884
z = np.random.rand(500)
885
# Poblacional regression (Data Generating Process GDP)
886

887

888
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
889
X = np.column_stack((np.ones(500),x1,x2,x3,x4))
890
X
891
from scipy.stats import t # t - student 
892
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
893

894
    if standar and Pvalue and (instrumento is None)  and (index is None) :
895
        
896
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
897
        
898
        y_est =  X @ beta   ## Y estimado 
899
        n = X.shape[0]
900
        k = X.shape[1] - 1  
901
        nk = n - k     ## grados de libertad
902
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
903
        Var = sigma*np.linalg.inv(X.T @ X)
904
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
905
        t_est = np.absolute(beta/sd)
906
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
907
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
908
                                 "Pvalue" : pvalue})    
909

910
    
911
    elif (not instrumento is None) and (not index is None) :
912
        
913
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
914
        
915
        index = index  - 1 
916
        Z = X
917
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
918
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
919
        x_est  = Z @ beta_x
920
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
921
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
922
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
923

924
    return df
925

926

927

928
ols(X,Y)
929

930
ols(X,Y,instrumento = z, index = 1)
931

932
# Para una muestra de 1000
933

934
random.sample(range(10000),k=800)
935
x1 = np.random.rand(800) # uniform distribution  [0,1]
936
x2 = np.random.rand(800) # uniform distribution [0,1]
937
x3 = np.random.rand(800) # uniform distribution [0,1]
938
x4 = np.random.rand(800) # uniform distribution [0,1]
939
e = np.random.normal(0,1,800) # normal distribution mean = 0 and sd = 1
940
z = np.random.rand(800)
941
# Poblacional regression (Data Generating Process GDP)
942

943

944
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
945
X = np.column_stack((np.ones(800),x1,x2,x3,x4))
946
X
947
from scipy.stats import t # t - student 
948
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
949

950
    if standar and Pvalue and (instrumento is None)  and (index is None) :
951
        
952
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
953
        
954
        y_est =  X @ beta   ## Y estimado 
955
        n = X.shape[0]
956
        k = X.shape[1] - 1  
957
        nk = n - k     ## grados de libertad
958
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
959
        Var = sigma*np.linalg.inv(X.T @ X)
960
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
961
        t_est = np.absolute(beta/sd)
962
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
963
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
964
                                 "Pvalue" : pvalue})    
965

966
    
967
    elif (not instrumento is None) and (not index is None) :
968
        
969
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
970
        
971
        index = index  - 1 
972
        Z = X
973
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
974
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
975
        x_est  = Z @ beta_x
976
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
977
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
978
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
979

980
    return df
981

982

983

984
ols(X,Y)
985

986
ols(X,Y,instrumento = z, index = 1)
987

988

989
# Para una muestra de 5000
990
random.sample(range(10000),k=1000)
991
x1 = np.random.rand(1000) # uniform distribution  [0,1]
992
x2 = np.random.rand(1000) # uniform distribution [0,1]
993
x3 = np.random.rand(1000) # uniform distribution [0,1]
994
x4 = np.random.rand(1000) # uniform distribution [0,1]
995
e = np.random.normal(0,1,1000) # normal distribution mean = 0 and sd = 1
996
z = np.random.rand(1000)
997
# Poblacional regression (Data Generating Process GDP)
998

999

1000
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + e
1001
X = np.column_stack((np.ones(1000),x1,x2,x3,x4))
1002
X
1003
from scipy.stats import t # t - student 
1004
def ols(M,Y, standar = True, Pvalue = True , instrumento = None, index = None):
1005

1006
    if standar and Pvalue and (instrumento is None)  and (index is None) :
1007
        
1008
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )  ## estimación de beta
1009
        
1010
        y_est =  X @ beta   ## Y estimado 
1011
        n = X.shape[0]
1012
        k = X.shape[1] - 1  
1013
        nk = n - k     ## grados de libertad
1014
        sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
1015
        Var = sigma*np.linalg.inv(X.T @ X)
1016
        sd = np.sqrt( np.diag(Var) )  ## raíz cuadrado a los datos de la diagonal principal de Var
1017
        t_est = np.absolute(beta/sd)
1018
        pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
1019
        df = pd.DataFrame( {"OLS": beta , "standar_error" : sd ,
1020
                                 "Pvalue" : pvalue})    
1021

1022
    
1023
    elif (not instrumento is None) and (not index is None) :
1024
        
1025
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
1026
        
1027
        index = index  - 1 
1028
        Z = X
1029
        Z[:,index] = z ## reemplazamos la variable endógena por el instrumento en la matrix de covariables
1030
        beta_x = np.linalg.inv(Z.T @ Z) @ ((Z.T) @ X[:,index] ) 
1031
        x_est  = Z @ beta_x
1032
        X[:,index] = x_est ## se reemplaza la variable x endógena por su estimado 
1033
        beta_iv = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
1034
        df = pd.DataFrame( {"OLS": beta , "OLS_IV" : beta_iv})  
1035

1036
    return df
1037

1038

1039

1040
ols(X,Y)
1041

1042
ols(X,Y,instrumento = z, index = 1)
1043

1044

1045

1046
Product

Resources

Company