CoCalc -- fractal.pyx

GitHub Repository: sagemath/sagelib
Path: blob/master/sage/finance/fractal.pyx
⁴⁰⁹⁴ views
1
r"""
2
Multifractal Random Walk
3

4
This module implements the fractal approach to understanding financial
5
markets that was pioneered by Mandelbrot.  In particular, it implements
6
the multifractal random walk model of asset returns as developed by
7
Bacry, Kozhemyak, and Muzy, 2006, *Continuous cascade models for asset
8
returns* and many other papers by Bacry et al. See
9
http://www.cmap.polytechnique.fr/~bacry/ftpPapers.html
10

11
See also Mandelbrot's *The Misbehavior of Markets* for a motivated
12
introduction to the general idea of using a self-similar approach to
13
modeling asset returns.
14

15
One of the main goals of this implementation is that everything is
16
highly optimized and ready for real world high performance simulation
17
work.
18

19
AUTHOR:
20

21
- William Stein (2008)
22
"""
23

24
from sage.rings.all import RDF, CDF, Integer
25
from sage.modules.all import vector
26
I = CDF.gen()
27

28
from time_series cimport TimeSeries
29

30
cdef extern from "math.h":
31
    double exp(double)
32
    double log(double)
33
    double pow(double, double)
34
    double sqrt(double)
35

36
##################################################################
37
# Simulation
38
##################################################################
39

40
def stationary_gaussian_simulation(s, N, n=1):
41
    r"""
42
    Implementation of the Davies-Harte algorithm which given an
43
    autocovariance sequence (ACVS) ``s`` and an integer ``N``, simulates ``N``
44
    steps of the corresponding stationary Gaussian process with mean
45
    0. We assume that a certain Fourier transform associated to ``s`` is
46
    nonnegative; if it isn't, this algorithm fails with a
47
    ``NotImplementedError``.
48

49
    INPUT:
50

51
    - ``s`` -- a list of real numbers that defines the ACVS.
52
      Optimally ``s`` should have length ``N+1``; if not
53
      we pad it with extra 0's until it has length ``N+1``.
54

55
    - ``N`` -- a positive integer.
56
        
57
    OUTPUT:
58

59
    A list of ``n`` time series.
60
        
61
    EXAMPLES:
62

63
    We define an autocovariance sequence::
64

65
        sage: N = 2^15
66
        sage: s = [1/math.sqrt(k+1) for k in [0..N]]
67
        sage: s[:5]
68
        [1.0, 0.7071067811865475, 0.5773502691896258, 0.5, 0.4472135954999579]
69

70
    We run the simulation::
71

72
        sage: set_random_seed(0)
73
        sage: sim = finance.stationary_gaussian_simulation(s, N)[0]
74

75
    Note that indeed the autocovariance sequence approximates ``s`` well::
76

77
        sage: [sim.autocovariance(i) for i in [0..4]]
78
        [0.98665816086255..., 0.69201577095377..., 0.56234006792017..., 0.48647965409871..., 0.43667043322102...]
79

80
    .. WARNING::
81

82
        If you were to do the above computation with a small
83
        value of ``N``, then the autocovariance sequence would not approximate
84
        ``s`` very well.
85

86
    REFERENCES:
87

88
    This is a standard algorithm that is described in several papers.
89
    It is summarized nicely with many applications at the beginning of
90
    *Simulating a Class of Stationary Gaussian Processes Using the 
91
    Davies-Harte Algorithm, with Application to Long Memory
92
    Processes*, 2000, Peter F. Craigmile, which is easily found as a
93
    free PDF via a Google search.  This paper also generalizes the
94
    algorithm to the case when all elements of ``s`` are nonpositive.
95

96
    The book *Wavelet Methods for Time Series Analysis* by Percival
97
    and Walden also describes this algorithm, but has a typo in that
98
    they put a `2\pi` instead of `\pi` a certain sum.  That book describes
99
    exactly how to use Fourier transform.  The description is in
100
    Section 7.8.  Note that these pages are missing from the Google
101
    Books version of the book, but are in the Amazon.com preview of
102
    the book.
103
    """
104
    N = Integer(N)
105
    if N < 1:
106
        raise ValueError, "N must be positive"
107

108
    if not isinstance(s, TimeSeries):
109
        s = TimeSeries(s)
110

111
    # Make sure s has length N+1.
112
    if len(s) > N + 1:
113
        s = s[:N+1]
114
    elif len(s) < N + 1:
115
        s += TimeSeries(N+1-len(s))
116

117
    # Make symmetrized vector.
118
    v = s + s[1:-1].reversed()
119

120
    # Compute its fast Fourier transform.
121
    cdef TimeSeries a
122
    a = v.fft()
123

124
    # Take the real entries in the result.
125
    a = TimeSeries([a[0]]) + a[1:].scale_time(2)
126
    
127
    # Verify the nonnegativity condition.
128
    if a.min() < 0:
129
        raise NotImplementedError, "Stationary Gaussian simulation only implemented when Fourier transform is nonnegative"
130
    
131
    sims = []
132
    cdef Py_ssize_t i, k, iN = N
133
    cdef TimeSeries y, Z
134
    cdef double temp, nn = N
135
    for i from 0 <= i < n:
136
        Z = TimeSeries(2*iN).randomize('normal')
137
        y = TimeSeries(2*iN)
138
        y._values[0] = sqrt(2*nn*a._values[0]) * Z._values[0]
139
        y._values[2*iN-1] = sqrt(2*nn*a._values[iN]) * Z._values[2*iN-1]
140
        for k from 1 <= k < iN:
141
            temp = sqrt(nn*a._values[k])
142
            y._values[2*k-1] = temp*Z._values[2*k-1]
143
            y._values[2*k] = temp*Z._values[2*k]
144
        y.ifft(overwrite=True)
145
        sims.append(y[:iN])
146

147
    return sims
148

149
def fractional_gaussian_noise_simulation(double H, double sigma2, N, n=1):
150
    r"""
151
    Return ``n`` simulations with ``N`` steps each of fractional Gaussian
152
    noise with Hurst parameter ``H`` and innovations variance ``sigma2``.
153
    
154
    INPUT:
155

156
    - ``H`` -- float; ``0 < H < 1``; the Hurst parameter.
157

158
    - ``sigma2`` - positive float; innovation variance.
159

160
    - ``N`` -- positive integer; number of steps in simulation.
161

162
    - ``n`` -- positive integer (default: 1); number of simulations.
163

164
    OUTPUT:
165

166
    List of ``n`` time series.
167

168
    EXAMPLES:
169

170
    We simulate a fractional Gaussian noise::
171

172
        sage: set_random_seed(0)
173
        sage: finance.fractional_gaussian_noise_simulation(0.8,1,10,2)
174
        [[-0.1157, 0.7025, 0.4949, 0.3324, 0.7110, 0.7248, -0.4048, 0.3103, -0.3465, 0.2964],
175
         [-0.5981, -0.6932, 0.5947, -0.9995, -0.7726, -0.9070, -1.3538, -1.2221, -0.0290, 1.0077]]
176

177
    The sums define a fractional Brownian motion process::
178

179
        sage: set_random_seed(0)
180
        sage: finance.fractional_gaussian_noise_simulation(0.8,1,10,1)[0].sums()
181
        [-0.1157, 0.5868, 1.0818, 1.4142, 2.1252, 2.8500, 2.4452, 2.7555, 2.4090, 2.7054]
182

183
    ALGORITHM:
184

185
    See *Simulating a Class of Stationary Gaussian
186
    Processes using the Davies-Harte Algorithm, with Application to
187
    Long Meoryy Processes*, 2000, Peter F. Craigmile for a discussion
188
    and references for why the algorithm we give -- which uses
189
    the ``stationary_gaussian_simulation()`` function.
190
    """
191
    if H <= 0 or H >= 1:
192
        raise ValueError, "H must satisfy 0 < H < 1"
193
    if sigma2 <= 0:
194
        raise ValueError, "sigma2 must be positive"
195
    N = Integer(N)
196
    if N < 1:
197
        raise ValueError, "N must be positive"
198
    cdef TimeSeries s = TimeSeries(N+1)
199
    s._values[0] = sigma2
200
    cdef Py_ssize_t k
201
    cdef double H2 = 2*H
202
    for k from 1 <= k <= N:
203
        s._values[k] = sigma2/2 * (pow(k+1,H2) - 2*pow(k,H2) + pow(k-1,H2))
204
    return stationary_gaussian_simulation(s, N, n)
205

206

207
def fractional_brownian_motion_simulation(double H, double sigma2, N, n=1):
208
    """
209
    Returns the partial sums of a fractional Gaussian noise simulation
210
    with the same input parameters.
211

212
    INPUT:
213

214
    - ``H`` -- float; ``0 < H < 1``; the Hurst parameter.
215

216
    - ``sigma2`` - float; innovation variance (should be close to 0).
217

218
    - ``N`` -- positive integer.
219

220
    - ``n`` -- positive integer (default: 1).
221

222
    OUTPUT:
223

224
    List of ``n`` time series.
225

226
    EXAMPLES::
227

228
        sage: set_random_seed(0)
229
        sage: finance.fractional_brownian_motion_simulation(0.8,0.1,8,1)
230
        [[-0.0754, 0.1874, 0.2735, 0.5059, 0.6824, 0.6267, 0.6465, 0.6289]]
231
        sage: set_random_seed(0)
232
        sage: finance.fractional_brownian_motion_simulation(0.8,0.01,8,1)
233
        [[-0.0239, 0.0593, 0.0865, 0.1600, 0.2158, 0.1982, 0.2044, 0.1989]]
234
        sage: finance.fractional_brownian_motion_simulation(0.8,0.01,8,2)
235
        [[-0.0167, 0.0342, 0.0261, 0.0856, 0.1735, 0.2541, 0.1409, 0.1692],
236
         [0.0244, -0.0153, 0.0125, -0.0363, 0.0764, 0.1009, 0.1598, 0.2133]]
237
    """
238
    return [a.sums() for a in fractional_gaussian_noise_simulation(H,sigma2,N,n)]
239
    
240
def multifractal_cascade_random_walk_simulation(double T,
241
                                                double lambda2,
242
                                                double ell,
243
                                                double sigma2,
244
                                                N,
245
                                                n=1):
246
    r"""
247
    Return a list of ``n`` simulations of a multifractal random walk using
248
    the log-normal cascade model of Bacry-Kozhemyak-Muzy 2008.  This
249
    walk can be interpreted as the sequence of logarithms of a price
250
    series.
251

252
    INPUT:
253

254
    - ``T`` -- positive real; the integral scale.
255

256
    - ``lambda2`` -- positive real; the intermittency coefficient.
257

258
    - ``ell`` -- a small number -- time step size.
259

260
    - ``sigma2`` -- variance of the Gaussian white noise ``eps[n]``.
261

262
    - ``N`` -- number of steps in each simulation.
263

264
    - ``n`` -- the number of separate simulations to run.
265
        
266
    OUTPUT:
267

268
    List of time series.
269

270
    EXAMPLES::
271

272
        sage: set_random_seed(0)
273
        sage: a = finance.multifractal_cascade_random_walk_simulation(3770,0.02,0.01,0.01,10,3)
274
        sage: a
275
        [[-0.0096, 0.0025, 0.0066, 0.0016, 0.0078, 0.0051, 0.0047, -0.0013, 0.0003, -0.0043],
276
         [0.0003, 0.0035, 0.0257, 0.0358, 0.0377, 0.0563, 0.0661, 0.0746, 0.0749, 0.0689],
277
         [-0.0120, -0.0116, 0.0043, 0.0078, 0.0115, 0.0018, 0.0085, 0.0005, 0.0012, 0.0060]]
278

279
    The corresponding price series::
280

281
        sage: a[0].exp()
282
        [0.9905, 1.0025, 1.0067, 1.0016, 1.0078, 1.0051, 1.0047, 0.9987, 1.0003, 0.9957]
283

284
    MORE DETAILS:
285

286
    The random walk has n-th step `\text{eps}_n e^{\omega_n}`, where
287
    `\text{eps}_n` is gaussian white noise of variance `\sigma^2` and
288
    `\omega_n` is renormalized gaussian magnitude, which is given by a
289
    stationary gaussian simulation associated to a certain autocovariance
290
    sequence.  See Bacry, Kozhemyak, Muzy, 2006,
291
    *Continuous cascade models for asset returns* for details.
292
    """
293
    if ell <= 0:
294
        raise ValueError, "ell must be positive"
295
    if T <= ell:
296
        raise ValueError, "T must be > ell"
297
    if lambda2 <= 0:
298
        raise ValueError, "lambda2 must be positive"
299
    N = Integer(N)
300
    if N < 1:
301
        raise ValueError, "N must be positive"
302

303
    # Compute the mean of the Gaussian stationary process omega.
304
    # See page 3 of Bacry, Kozhemyak, Muzy, 2008 -- "Log-Normal
305
    # Continuous Cascades..."
306
    cdef double mean = -lambda2 * (log(T/ell) + 1)
307

308
    # Compute the autocovariance sequence of the Gaussian
309
    # stationary process omega.  [Loc. cit.]  We have
310
    # tau = ell*i in that formula (6). 
311
    cdef TimeSeries s = TimeSeries(N+1)
312
    s._values[0] = lambda2*(log(T/ell) + 1)
313
    cdef Py_ssize_t i
314
    for i from 1 <= i < N+1:
315
        if ell*i >= T:
316
            s._values[i] = lambda2 * log(T/(ell*i))
317
        else:
318
            break  # all covariance numbers after this point are 0
319

320
    # Compute n simulations of omega, but with mean 0. 
321
    omega = stationary_gaussian_simulation(s, N+1, n)
322

323
    # Increase each by the given mean.
324
    omega = [t.add_scalar(mean) for t in omega]
325

326
    # For each simulation, create corresponding multifractal
327
    # random walk, as explained on page 6 of [loc. cit.]
328
    sims = []
329
    cdef Py_ssize_t k
330
    cdef TimeSeries eps, steps, om
331
    for om in omega:
332
        # First compute N Gaussian white noise steps
333
        eps = TimeSeries(N).randomize('normal', 0, sigma2)
334

335
        # Compute the steps of the multifractal random walk.
336
        steps = TimeSeries(N)
337
        for k from 0 <= k < N:
338
            steps._values[k] = eps._values[k] * exp(om._values[k])
339

340
        sims.append(steps.sums())
341
        
342
    return sims
343
        
344
    
345

346
    
347
    
348

349

350
## ##################################################################
351
## # Forecasting
352
## ##################################################################
353
## def multifractal_cascade_linear_filter(double T,  double lambda2,
354
##                    double sigma, double ell, Py_ssize_t M):
355
##     """
356
##     NOTE: I CANT GET ANALYTIC PARAMETERS RIGHT -- SWITCH TO MONTE CARLO...
357
    
358
##     Create the linear filter for the multifractal cascade with M steps
359
##     with given multifractal parameters.  Use this to predict the
360
##     squares of log price returns, i.e., the volatility.
361
    
362
##     INPUT:
363
##         T       -- positive real; the integral scale
364
##         lambda2 -- positive real; the intermittency coefficient
365
##         sigma   -- scaling parameter
366
##         ell     -- a small number -- time step size.
367
##         M       -- number of steps in linear filter
368

369
##     OUTPUT:
370
##         a time series, which should be given as the input
371
##         to the linear_forecast function.
372

373
##     REFERENCES:  See Section 13.6 "Linear Prediction and Linear Predictive Coding"
374
##     from Numerical Recipes in C for computing linear prediction
375
##     filters.  See the top of page 3 -- equation 39 -- of Bacry,
376
##     Kozhemyak, Muzy, 2006, Continuous cascade models for asset returns
377
##     for the specific analytic formulas we use of the theoretical
378
##     autocovariance.
379

380
##     ALGORITHM: The runtime is dominated by solving a numerical linear
381
##     equation involving an M x M matrix. Thus the filter for M up to
382
##     5000 can reasonably be done in less than a minute.
383

384
##     EXAMPLES:
385
##     We compute a linear filter with just a few terms:
386
##         sage: L = finance.multifractal_cascade_linear_filter(1000, 0.02, 2, 0.01, 5); L
387
##         [0.6453, 0.0628, 0.0822, 0.0570, 0.0959]
388

389
##     Note that the sum as we increase the length of the filter goes to 1:
390
##         sage: sum(L)
391
##         0.94318436639725745
392
##         sage: L = finance.multifractal_cascade_linear_filter(1000, 0.02, 2, 0.01, 1000); L
393
##         [0.6014, 0.0408, 0.0576, 0.0323, 0.0241 ... 0.0001, 0.0001, 0.0002, 0.0002, 0.0005]
394
##         sage: sum(L)
395
##         0.99500051396305267
396

397
##     We create a random multifractal walk:
398
##         sage: set_random_seed(0)
399
##         sage: y = finance.multifractal_cascade_random_walk_simulation(3770,0.02,0.01,2000,1)[0]; y
400
##         [0.0002, -0.0035, -0.0107, 0.0234, 0.0181 ... 0.2526, 0.2427, 0.2508, 0.2530, 0.2530]
401

402
##     We then square each term in the series. 
403
##         sage: y2 = y.pow(2)
404
##         sage: y2
405
##         [0.0000, 0.0000, 0.0001, 0.0005, 0.0003 ... 0.0638, 0.0589, 0.0629, 0.0640, 0.0640]
406
##         sage: y2[:-1].linear_forecast(L)
407
##         0.064481359234932076
408
##         sage: y2[:-2].linear_forecast(L)
409
##         0.063950875470110996
410
    
411
##     """
412
##     cdef TimeSeries c
413
##     cdef Py_ssize_t i
414

415
##     if M <= 0:
416
##         raise ValueError, "M must be positive"
417

418
##     # 1. Compute the autocovariance sequence
419
##     c = TimeSeries(M+1)
420

421
##     for i from 1 <= i <= M:
422
##         c._values[i] = lambda2 * log(T/i)
423
##     print c
424

425
##     # cdef double s4 = pow(sigma,4)
426
##     # cdef double ell2 = ell*ell
427
##     # cdef e = -4*lambda2
428
##     #for i from 1 <= i <= M:
429
##     #   c(i) = sigma^4 * ell^2 * (i/T)^(-4*lambda^2)
430
##     #    c._values[i] = s4 * ell2 * pow(i/T, e)
431

432
##     #cdef double K, tau
433
##     #for i from 0 <= i <= M:
434
##     #    tau = ell*i
435
##     #    K = s4 * pow(T,4*lambda2) / ((1 + e) * (2 + e))
436
##     #    c._values[i] = K * (pow(abs(ell + tau), 2+e) + pow(abs(ell-tau), 2+e) - 2*pow(abs(tau),2+e))
437

438
##     # Also create the numpy vector v with entries c(1), ..., c(M).
439
##     v = c[1:].numpy()
440
    
441
##     # 2. Create the autocovariances numpy 2d array A whose i,j entry
442
##     # is c(|i-j|).
443
##     import numpy
444
##     A = numpy.array([[c[abs(j-k)] for j in range(M)] for k in range(M)])
445
    
446
##     # 3. Solve the equation A * x = v
447
##     x = numpy.linalg.solve(A, v)
448

449
##     # 4. The entries of x give the linear filter coefficients.
450
##     return TimeSeries(x)
451

452

453

454
##################################################################
455
# Parameter Estimation
456
##################################################################
457

458
Product

Resources

Company