Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sagelib
Path: blob/master/sage/finance/time_series.pyx
4079 views
1
"""
2
Time Series
3
4
This is a module for working with discrete floating point time series.
5
It is designed so that every operation is very fast, typically much
6
faster than with other generic code, e.g., Python lists of doubles or
7
even NumPy arrays. The semantics of time series is more similar to
8
Python lists of doubles than Sage real double vectors or NumPy 1-D
9
arrays. In particular, time series are not endowed with much
10
algebraic structure and are always mutable.
11
12
.. NOTE::
13
14
NumPy arrays are faster at slicing, since slices return
15
references, and NumPy arrays have strides. However, this speed at
16
slicing makes NumPy slower at certain other operations.
17
18
EXAMPLES::
19
20
sage: set_random_seed(1)
21
sage: t = finance.TimeSeries([random()-0.5 for _ in xrange(10)]); t
22
[0.3294, 0.0959, -0.0706, -0.4646, 0.4311, 0.2275, -0.3840, -0.3528, -0.4119, -0.2933]
23
sage: t.sums()
24
[0.3294, 0.4253, 0.3547, -0.1099, 0.3212, 0.5487, 0.1647, -0.1882, -0.6001, -0.8933]
25
sage: t.exponential_moving_average(0.7)
26
[0.0000, 0.3294, 0.1660, 0.0003, -0.3251, 0.2042, 0.2205, -0.2027, -0.3078, -0.3807]
27
sage: t.standard_deviation()
28
0.33729638212891383
29
sage: t.mean()
30
-0.08933425506929439
31
sage: t.variance()
32
0.1137688493972542...
33
34
AUTHOR:
35
36
- William Stein
37
"""
38
39
include "../ext/cdefs.pxi"
40
include "../ext/stdsage.pxi"
41
include "../ext/python_string.pxi"
42
include "../ext/random.pxi"
43
44
cdef extern from "math.h":
45
double exp(double)
46
double floor(double)
47
double log(double)
48
double pow(double, double)
49
double sqrt(double)
50
51
cdef extern from "string.h":
52
void* memcpy(void* dst, void* src, size_t len)
53
54
cimport numpy as cnumpy
55
56
from sage.rings.integer import Integer
57
from sage.rings.real_double import RDF
58
from sage.modules.vector_real_double_dense cimport Vector_real_double_dense
59
60
max_print = 10
61
digits = 4
62
63
cdef class TimeSeries:
64
def __cinit__(self):
65
"""
66
Create new empty uninitialized time series.
67
68
EXAMPLES:
69
70
This implicitly calls new::
71
72
sage: finance.TimeSeries([1,3,-4,5])
73
[1.0000, 3.0000, -4.0000, 5.0000]
74
"""
75
self._values = NULL
76
77
def __init__(self, values, bint initialize=True):
78
"""
79
Initialize new time series.
80
81
INPUT:
82
83
- ``values`` -- integer (number of values) or an iterable of
84
floats.
85
86
- ``initialize`` -- bool (default: ``True``); if ``False``, do not
87
bother to zero out the entries of the new time series.
88
For large series that you are going to just fill in,
89
this can be way faster.
90
91
EXAMPLES:
92
93
This implicitly calls init::
94
95
sage: finance.TimeSeries([pi, 3, 18.2])
96
[3.1416, 3.0000, 18.2000]
97
98
Conversion from a NumPy 1-D array, which is very fast::
99
100
sage: v = finance.TimeSeries([1..5])
101
sage: w = v.numpy()
102
sage: finance.TimeSeries(w)
103
[1.0000, 2.0000, 3.0000, 4.0000, 5.0000]
104
105
Conversion from an n-dimensional NumPy array also works::
106
107
sage: import numpy
108
sage: v = numpy.array([[1,2], [3,4]], dtype=float); v
109
array([[ 1., 2.],
110
[ 3., 4.]])
111
sage: finance.TimeSeries(v)
112
[1.0000, 2.0000, 3.0000, 4.0000]
113
sage: finance.TimeSeries(v[:,0])
114
[1.0000, 3.0000]
115
sage: u = numpy.array([[1,2],[3,4]])
116
sage: finance.TimeSeries(u)
117
[1.0000, 2.0000, 3.0000, 4.0000]
118
119
For speed purposes we don't initialize (so value is garbage)::
120
121
sage: t = finance.TimeSeries(10, initialize=False)
122
"""
123
cdef Vector_real_double_dense z
124
cdef cnumpy.ndarray np
125
cdef double *np_data
126
cdef unsigned int j
127
if isinstance(values, (int, long, Integer)):
128
self._length = values
129
values = None
130
elif PY_TYPE_CHECK(values, Vector_real_double_dense) or PY_TYPE_CHECK(values, cnumpy.ndarray):
131
if PY_TYPE_CHECK(values, Vector_real_double_dense):
132
np = values._vector_numpy
133
else:
134
np = values
135
136
if np.ndim != 1:
137
np = np.reshape([np.size])
138
139
# Make the array be the correct type and have a C array
140
# for a data structure. If the array already is the
141
# correct type and has a C array, nothing is done, so this
142
# should be fast in the common case.
143
np = np.astype('double')
144
np = cnumpy.PyArray_GETCONTIGUOUS(np)
145
np_data = <double*> cnumpy.PyArray_DATA(np)
146
self._length = np.shape[0]
147
self._values = <double*> sage_malloc(sizeof(double) * self._length)
148
if self._values == NULL:
149
raise MemoryError
150
151
memcpy(self._values, np_data, sizeof(double)*self._length)
152
return
153
else:
154
values = [float(x) for x in values]
155
self._length = len(values)
156
157
self._values = <double*> sage_malloc(sizeof(double) * self._length)
158
if self._values == NULL:
159
raise MemoryError
160
if not initialize: return
161
cdef Py_ssize_t i
162
if values is not None:
163
for i from 0 <= i < self._length:
164
self._values[i] = values[i]
165
else:
166
for i from 0 <= i < self._length:
167
self._values[i] = 0
168
169
def __reduce__(self):
170
"""
171
Used in pickling time series.
172
173
EXAMPLES::
174
175
sage: v = finance.TimeSeries([1,-3.5])
176
sage: v.__reduce__()
177
(<built-in function unpickle_time_series_v1>, (..., 2))
178
sage: loads(dumps(v)) == v
179
True
180
181
Note that dumping and loading with compress ``False`` is much faster,
182
though dumping with compress ``True`` can save a lot of space::
183
184
sage: v = finance.TimeSeries([1..10^5])
185
sage: loads(dumps(v, compress=False),compress=False) == v
186
True
187
"""
188
buf = PyString_FromStringAndSize(<char*>self._values, self._length*sizeof(double)/sizeof(char))
189
return unpickle_time_series_v1, (buf, self._length)
190
191
def __cmp__(self, _other):
192
"""
193
Compare ``self`` and ``other``. This has the same semantics
194
as list comparison.
195
196
EXAMPLES::
197
198
sage: v = finance.TimeSeries([1,2,3]); w = finance.TimeSeries([1,2])
199
sage: v < w
200
False
201
sage: w < v
202
True
203
sage: v == v
204
True
205
sage: w == w
206
True
207
"""
208
cdef TimeSeries other
209
cdef Py_ssize_t c, i
210
cdef double d
211
if not isinstance(_other, TimeSeries):
212
_other = TimeSeries(_other)
213
other = _other
214
for i from 0 <= i < min(self._length, other._length):
215
d = self._values[i] - other._values[i]
216
if d:
217
return -1 if d < 0 else 1
218
c = self._length - other._length
219
if c < 0:
220
return -1
221
elif c > 0:
222
return 1
223
return 0
224
225
def __dealloc__(self):
226
"""
227
Free up memory used by a time series.
228
229
EXAMPLES:
230
231
This tests ``__dealloc__`` implicitly::
232
233
sage: v = finance.TimeSeries([1,3,-4,5])
234
sage: del v
235
"""
236
if self._values:
237
sage_free(self._values)
238
239
def vector(self):
240
"""
241
Return real double vector whose entries are the values of this
242
time series. This is useful since vectors have standard
243
algebraic structure and play well with matrices.
244
245
OUTPUT:
246
247
A real double vector.
248
249
EXAMPLES::
250
251
sage: v = finance.TimeSeries([1..10])
252
sage: v.vector()
253
(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)
254
"""
255
V = RDF**self._length
256
# A copy of the numpy array is made in the vector constructor
257
cdef Vector_real_double_dense x = Vector_real_double_dense(V, self.numpy(copy=False))
258
return x
259
260
def __repr__(self):
261
"""
262
Return string representation of ``self``.
263
264
EXAMPLES::
265
266
sage: v = finance.TimeSeries([1,3.1908439,-4,5.93932])
267
sage: v.__repr__()
268
'[1.0000, 3.1908, -4.0000, 5.9393]'
269
270
By default 4 digits after the decimal point are displayed. To
271
change this, change ``self.finance.time_series.digits``. ::
272
273
sage: sage.finance.time_series.digits = 2
274
sage: v.__repr__()
275
'[1.00, 3.19, -4.00, 5.94]'
276
sage: v
277
[1.00, 3.19, -4.00, 5.94]
278
sage: sage.finance.time_series.digits = 4
279
sage: v
280
[1.0000, 3.1908, -4.0000, 5.9393]
281
"""
282
return self._repr()
283
284
def _repr(self, prec=None):
285
"""
286
Print representation of a time series.
287
288
INPUT:
289
290
- ``prec`` -- (default: ``None``) number of digits of precision or
291
``None``. If ``None`` use the default
292
``sage.finance.time_series.digits``.
293
294
OUTPUT:
295
296
A string.
297
298
EXAMPLES::
299
300
sage: v = finance.TimeSeries([1,3.1908439,-4,5.93932])
301
sage: v._repr()
302
'[1.0000, 3.1908, -4.0000, 5.9393]'
303
sage: v._repr(10)
304
'[1.0000000000, 3.1908439000, -4.0000000000, 5.9393200000]'
305
sage: v._repr(2)
306
'[1.00, 3.19, -4.00, 5.94]'
307
"""
308
if prec is None: prec = digits
309
format = '%.' + str(prec) + 'f'
310
if len(self) > max_print:
311
v0 = self[:max_print//2]
312
v1 = self[-max_print//2:]
313
return '[' + ', '.join([format%x for x in v0]) + ' ... ' + \
314
', '.join([format%x for x in v1]) + ']'
315
else:
316
return '[' + ', '.join([format%x for x in self]) + ']'
317
318
def __len__(self):
319
"""
320
Return the number of entries in this time series.
321
322
OUTPUT:
323
324
Python integer.
325
326
EXAMPLES::
327
328
sage: v = finance.TimeSeries([1,3.1908439,-4,5.93932])
329
sage: v.__len__()
330
4
331
sage: len(v)
332
4
333
"""
334
return self._length
335
336
def __getitem__(self, i):
337
"""
338
Return i-th entry or slice of ``self``.
339
340
EXAMPLES::
341
342
sage: v = finance.TimeSeries([1,-4,3,-2.5,-4,3])
343
sage: v[2]
344
3.0
345
sage: v[-1]
346
3.0
347
sage: v[-10]
348
Traceback (most recent call last):
349
...
350
IndexError: TimeSeries index out of range
351
sage: v[5]
352
3.0
353
sage: v[6]
354
Traceback (most recent call last):
355
...
356
IndexError: TimeSeries index out of range
357
358
Some slice examples::
359
360
sage: v[-3:]
361
[-2.5000, -4.0000, 3.0000]
362
sage: v[-3:-1]
363
[-2.5000, -4.0000]
364
sage: v[::2]
365
[1.0000, 3.0000, -4.0000]
366
sage: v[3:20]
367
[-2.5000, -4.0000, 3.0000]
368
sage: v[3:2]
369
[]
370
371
Make a copy::
372
373
sage: v[:]
374
[1.0000, -4.0000, 3.0000, -2.5000, -4.0000, 3.0000]
375
376
Reverse the time series::
377
378
sage: v[::-1]
379
[3.0000, -4.0000, -2.5000, 3.0000, -4.0000, 1.0000]
380
"""
381
cdef Py_ssize_t start, stop, step, j
382
cdef TimeSeries t
383
if PySlice_Check(i):
384
start = 0 if (i.start is None) else i.start
385
stop = self._length if (i.stop is None) else i.stop
386
step = 1 if (i.step is None) else i.step
387
if start < 0:
388
start += self._length
389
if start < 0: start = 0
390
elif start >= self._length:
391
start = self._length - 1
392
if stop < 0:
393
stop += self._length
394
if stop < 0: stop = 0
395
elif stop > self._length:
396
stop = self._length
397
if start >= stop:
398
return new_time_series(0)
399
if step < 0:
400
step = -step
401
t = new_time_series((stop-start)/step)
402
for j from 0 <= j < (stop-start)/step:
403
t._values[j] = self._values[stop-1 - j*step]
404
elif step > 1:
405
t = new_time_series((stop-start)/step)
406
for j from 0 <= j < (stop-start)/step:
407
t._values[j] = self._values[j*step+start]
408
else:
409
t = new_time_series(stop-start)
410
memcpy(t._values, self._values + start, sizeof(double)*t._length)
411
return t
412
else:
413
j = i
414
if j < 0:
415
j += self._length
416
if j < 0:
417
raise IndexError, "TimeSeries index out of range"
418
elif j >= self._length:
419
raise IndexError, "TimeSeries index out of range"
420
return self._values[j]
421
422
def __setitem__(self, Py_ssize_t i, double x):
423
"""
424
Set the i-th entry of ``self`` to ``x``.
425
426
INPUT:
427
428
- i -- a nonnegative integer.
429
430
- x -- a float.
431
432
EXAMPLES::
433
434
sage: v = finance.TimeSeries([1,3,-4,5.93932]); v
435
[1.0000, 3.0000, -4.0000, 5.9393]
436
sage: v[0] = -5.5; v
437
[-5.5000, 3.0000, -4.0000, 5.9393]
438
sage: v[-1] = 3.2; v
439
[-5.5000, 3.0000, -4.0000, 3.2000]
440
sage: v[10]
441
Traceback (most recent call last):
442
...
443
IndexError: TimeSeries index out of range
444
sage: v[-5]
445
Traceback (most recent call last):
446
...
447
IndexError: TimeSeries index out of range
448
"""
449
if i < 0:
450
i += self._length
451
if i < 0:
452
raise IndexError, "TimeSeries index out of range"
453
elif i >= self._length:
454
raise IndexError, "TimeSeries index out of range"
455
self._values[i] = x
456
457
def __copy__(self):
458
"""
459
Return a copy of ``self``.
460
461
EXAMPLES::
462
463
sage: v = finance.TimeSeries([1,-4,3,-2.5,-4,3])
464
sage: v.__copy__()
465
[1.0000, -4.0000, 3.0000, -2.5000, -4.0000, 3.0000]
466
sage: copy(v)
467
[1.0000, -4.0000, 3.0000, -2.5000, -4.0000, 3.0000]
468
sage: copy(v) is v
469
False
470
"""
471
cdef Py_ssize_t i
472
cdef TimeSeries t = new_time_series(self._length)
473
memcpy(t._values, self._values , sizeof(double)*self._length)
474
return t
475
476
def __add__(left, right):
477
"""
478
Concatenate the time series ``self`` and ``right``.
479
480
.. NOTE::
481
482
To add a single number to the entries of a time series,
483
use the ``add_scalar`` method, and to add componentwise use
484
the ``add_entries`` method.
485
486
INPUT:
487
488
- ``right`` -- a time series.
489
490
OUTPUT:
491
492
A time series.
493
494
EXAMPLES::
495
496
sage: v = finance.TimeSeries([1,2,3]); w = finance.TimeSeries([1,2])
497
sage: v + w
498
[1.0000, 2.0000, 3.0000, 1.0000, 2.0000]
499
sage: v = finance.TimeSeries([1,2,-5]); v
500
[1.0000, 2.0000, -5.0000]
501
502
Note that both summands must be a time series::
503
504
sage: v + xrange(4)
505
Traceback (most recent call last):
506
...
507
TypeError: right operand must be a time series
508
sage: [1,5] + v
509
Traceback (most recent call last):
510
...
511
TypeError: left operand must be a time series
512
"""
513
if not isinstance(right, TimeSeries):
514
raise TypeError, "right operand must be a time series"
515
if not isinstance(left, TimeSeries):
516
raise TypeError, "left operand must be a time series"
517
cdef TimeSeries R = right
518
cdef TimeSeries L = left
519
cdef TimeSeries t = new_time_series(L._length + R._length)
520
memcpy(t._values, L._values, sizeof(double)*L._length)
521
memcpy(t._values + L._length, R._values, sizeof(double)*R._length)
522
return t
523
524
def __mul__(left, right):
525
"""
526
Multiply a time series by an integer n, which (like for lists)
527
results in the time series concatenated with itself n times.
528
529
.. NOTE::
530
531
To multiply all the entries of a time series by a single
532
scalar, use the ``scale`` method.
533
534
INPUT:
535
536
- ``left``, ``right`` -- an integer and a time series.
537
538
OUTPUT:
539
540
A time series.
541
542
EXAMPLES::
543
544
sage: v = finance.TimeSeries([1,2,-5]); v
545
[1.0000, 2.0000, -5.0000]
546
sage: v*3
547
[1.0000, 2.0000, -5.0000, 1.0000, 2.0000, -5.0000, 1.0000, 2.0000, -5.0000]
548
sage: 3*v
549
[1.0000, 2.0000, -5.0000, 1.0000, 2.0000, -5.0000, 1.0000, 2.0000, -5.0000]
550
sage: v*v
551
Traceback (most recent call last):
552
...
553
TypeError: 'sage.finance.time_series.TimeSeries' object cannot be interpreted as an index
554
"""
555
cdef Py_ssize_t n, i
556
cdef TimeSeries T
557
if isinstance(left, TimeSeries):
558
T = left
559
n = right
560
else:
561
T = right
562
n = left
563
# Make n copies of T concatenated together
564
cdef TimeSeries v = new_time_series(T._length * n)
565
for i from 0 <= i < n:
566
memcpy(v._values + i*T._length, T._values, sizeof(double)*T._length)
567
return v
568
569
570
def autoregressive_fit(self,M):
571
r"""
572
This method fits the time series to an autoregressive process
573
of order ``M``. That is, we assume the process is given by
574
`X_t-\mu=a_1(X_{t-1}-\mu)+a_2(X_{t-1}-\mu)+\cdots+a_M(X_{t-M}-\mu)+Z_t`
575
where `\mu` is the mean of the process and `Z_t` is noise.
576
This method returns estimates for `a_1,\dots,a_M`.
577
578
The method works by solving the Yule-Walker equations
579
`\Gamma a =\gamma`, where `\gamma=(\gamma(1),\dots,\gamma(M))`,
580
`a=(a_1,\dots,a_M)` with `\gamma(i)` the autocovariance of lag `i`
581
and `\Gamma_{ij}=\gamma(i-j)`.
582
583
584
.. WARNING::
585
586
The input sequence is assumed to be stationary, which
587
means that the autocovariance `\langle y_j y_k \rangle` depends
588
only on the difference `|j-k|`.
589
590
INPUT:
591
592
- ``M`` -- an integer.
593
594
OUTPUT:
595
596
A time series -- the coefficients of the autoregressive process.
597
598
EXAMPLES::
599
600
sage: set_random_seed(0)
601
sage: v = finance.TimeSeries(10^4).randomize('normal').sums()
602
sage: F = v.autoregressive_fit(100)
603
sage: v
604
[0.6767, 0.2756, 0.6332, 0.0469, -0.8897 ... 87.6759, 87.6825, 87.4120, 87.6639, 86.3194]
605
sage: v.autoregressive_forecast(F)
606
86.0177285042...
607
sage: F
608
[1.0148, -0.0029, -0.0105, 0.0067, -0.0232 ... -0.0106, -0.0068, 0.0085, -0.0131, 0.0092]
609
610
sage: set_random_seed(0)
611
sage: t=finance.TimeSeries(2000)
612
sage: z=finance.TimeSeries(2000)
613
sage: z.randomize('normal',1)
614
[1.6767, 0.5989, 1.3576, 0.4136, 0.0635 ... 1.0057, -1.1467, 1.2809, 1.5705, 1.1095]
615
sage: t[0]=1
616
sage: t[1]=2
617
sage: for i in range(2,2000):
618
... t[i]=t[i-1]-0.5*t[i-2]+z[i]
619
...
620
sage: c=t[0:-1].autoregressive_fit(2) #recovers recurrence relation
621
sage: c #should be close to [1,-0.5]
622
[1.0371, -0.5199]
623
"""
624
acvs = [self.autocovariance(i) for i in range(M+1)]
625
return autoregressive_fit(acvs)
626
627
def autoregressive_forecast(self, filter):
628
"""
629
Given the autoregression coefficients as outputted by the
630
``autoregressive_fit`` command, compute the forecast for the next
631
term in the series.
632
633
INPUT:
634
635
- ``filter`` -- a time series outputted by the ``autoregressive_fit``
636
command.
637
638
EXAMPLES::
639
640
sage: set_random_seed(0)
641
sage: v = finance.TimeSeries(100).randomize('normal').sums()
642
sage: F = v[:-1].autoregressive_fit(5); F
643
[1.0019, -0.0524, -0.0643, 0.1323, -0.0539]
644
sage: v.autoregressive_forecast(F)
645
11.7820298611...
646
sage: v
647
[0.6767, 0.2756, 0.6332, 0.0469, -0.8897 ... 9.2447, 9.6709, 10.4037, 10.4836, 12.1960]
648
"""
649
cdef TimeSeries filt
650
if isinstance(filter, TimeSeries):
651
filt = filter
652
else:
653
filt = TimeSeries(filter)
654
655
cdef double f = 0
656
cdef Py_ssize_t i
657
for i from 0 <= i < min(self._length, filt._length):
658
f += self._values[self._length - i - 1] * filt._values[i]
659
return f
660
661
def reversed(self):
662
"""
663
Return new time series obtain from this time series by
664
reversing the order of the entries in this time series.
665
666
OUTPUT:
667
668
A time series.
669
670
EXAMPLES::
671
672
sage: v = finance.TimeSeries([1..5])
673
sage: v.reversed()
674
[5.0000, 4.0000, 3.0000, 2.0000, 1.0000]
675
"""
676
cdef Py_ssize_t i, n = self._length-1
677
cdef TimeSeries t = new_time_series(self._length)
678
679
for i from 0 <= i < self._length:
680
t._values[i] = self._values[n - i]
681
return t
682
683
def extend(self, right):
684
"""
685
Extend this time series by appending elements from the iterable
686
``right``.
687
688
INPUT:
689
690
- ``right`` -- iterable that can be converted to a time series.
691
692
EXAMPLES::
693
694
sage: v = finance.TimeSeries([1,2,-5]); v
695
[1.0000, 2.0000, -5.0000]
696
sage: v.extend([-3.5, 2])
697
sage: v
698
[1.0000, 2.0000, -5.0000, -3.5000, 2.0000]
699
"""
700
if not isinstance(right, TimeSeries):
701
right = TimeSeries(right)
702
if len(right) == 0:
703
return
704
cdef TimeSeries T = right
705
cdef double* z = <double*> sage_malloc(sizeof(double)*(self._length + T._length))
706
if z == NULL:
707
raise MemoryError
708
memcpy(z, self._values, sizeof(double)*self._length)
709
memcpy(z + self._length, T._values, sizeof(double)*T._length)
710
sage_free(self._values)
711
self._values = z
712
self._length = self._length + T._length
713
714
def list(self):
715
"""
716
Return list of elements of ``self``.
717
718
EXAMPLES::
719
720
sage: v = finance.TimeSeries([1,-4,3,-2.5,-4,3])
721
sage: v.list()
722
[1.0, -4.0, 3.0, -2.5, -4.0, 3.0]
723
"""
724
cdef Py_ssize_t i
725
return [self._values[i] for i in range(self._length)]
726
727
def log(self):
728
"""
729
Return new time series got by taking the logarithms of all the
730
terms in the time series.
731
732
OUTPUT:
733
734
A new time series.
735
736
EXAMPLES:
737
738
We exponentiate then log a time series and get back
739
the original series::
740
741
sage: v = finance.TimeSeries([1,-4,3,-2.5,-4,3]); v
742
[1.0000, -4.0000, 3.0000, -2.5000, -4.0000, 3.0000]
743
sage: v.exp()
744
[2.7183, 0.0183, 20.0855, 0.0821, 0.0183, 20.0855]
745
sage: v.exp().log()
746
[1.0000, -4.0000, 3.0000, -2.5000, -4.0000, 3.0000]
747
748
Log of 0 gives ``-inf``::
749
750
sage: finance.TimeSeries([1,0,3]).log()[1]
751
-inf
752
"""
753
cdef Py_ssize_t i
754
cdef TimeSeries t = new_time_series(self._length)
755
for i from 0 <= i < self._length:
756
t._values[i] = log(self._values[i])
757
return t
758
759
def exp(self):
760
"""
761
Return new time series got by applying the exponential map to
762
all the terms in the time series.
763
764
OUTPUT:
765
766
A new time series.
767
768
EXAMPLES::
769
770
sage: v = finance.TimeSeries([1..5]); v
771
[1.0000, 2.0000, 3.0000, 4.0000, 5.0000]
772
sage: v.exp()
773
[2.7183, 7.3891, 20.0855, 54.5982, 148.4132]
774
sage: v.exp().log()
775
[1.0000, 2.0000, 3.0000, 4.0000, 5.0000]
776
"""
777
cdef Py_ssize_t i
778
cdef TimeSeries t = new_time_series(self._length)
779
for i from 0 <= i < self._length:
780
t._values[i] = exp(self._values[i])
781
return t
782
783
def abs(self):
784
"""
785
Return new time series got by replacing all entries
786
of ``self`` by their absolute value.
787
788
OUTPUT:
789
790
A new time series.
791
792
EXAMPLES::
793
794
sage: v = finance.TimeSeries([1,3.1908439,-4,5.93932])
795
sage: v
796
[1.0000, 3.1908, -4.0000, 5.9393]
797
sage: v.abs()
798
[1.0000, 3.1908, 4.0000, 5.9393]
799
"""
800
cdef Py_ssize_t i
801
cdef TimeSeries t = new_time_series(self._length)
802
for i from 0 <= i < self._length:
803
t._values[i] = self._values[i] if self._values[i] >= 0 else -self._values[i]
804
return t
805
806
def diffs(self, Py_ssize_t k = 1):
807
r"""
808
Return the new time series got by taking the differences of
809
successive terms in the time series. So if ``self`` is the time
810
series `X_0, X_1, X_2, \dots`, then this function outputs the
811
series `X_1 - X_0, X_2 - X_1, \dots`. The output series has one
812
less term than the input series. If the optional parameter
813
`k` is given, return `X_k - X_0, X_{2k} - X_k, \dots`.
814
815
INPUT:
816
817
- ``k`` -- positive integer (default: 1)
818
819
OUTPUT:
820
821
A new time series.
822
823
EXAMPLES::
824
825
sage: v = finance.TimeSeries([5,4,1.3,2,8]); v
826
[5.0000, 4.0000, 1.3000, 2.0000, 8.0000]
827
sage: v.diffs()
828
[-1.0000, -2.7000, 0.7000, 6.0000]
829
"""
830
if k != 1:
831
return self.scale_time(k).diffs()
832
cdef Py_ssize_t i
833
cdef TimeSeries t = new_time_series(self._length - 1)
834
for i from 1 <= i < self._length:
835
t._values[i-1] = self._values[i] - self._values[i-1]
836
return t
837
838
def scale_time(self, Py_ssize_t k):
839
r"""
840
Return the new time series at scale ``k``. If the input
841
time series is `X_0, X_1, X_2, \dots`, then this function
842
returns the shorter time series `X_0, X_k, X_{2k}, \dots`.
843
844
INPUT:
845
846
- ``k`` -- a positive integer.
847
848
OUTPUT:
849
850
A new time series.
851
852
EXAMPLES::
853
854
sage: v = finance.TimeSeries([5,4,1.3,2,8,10,3,-5]); v
855
[5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
856
sage: v.scale_time(1)
857
[5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
858
sage: v.scale_time(2)
859
[5.0000, 1.3000, 8.0000, 3.0000]
860
sage: v.scale_time(3)
861
[5.0000, 2.0000]
862
sage: v.scale_time(10)
863
[]
864
865
A series of odd length::
866
867
sage: v = finance.TimeSeries([1..5]); v
868
[1.0000, 2.0000, 3.0000, 4.0000, 5.0000]
869
sage: v.scale_time(2)
870
[1.0000, 3.0000, 5.0000]
871
872
TESTS::
873
874
sage: v.scale_time(0)
875
Traceback (most recent call last):
876
...
877
ValueError: k must be positive
878
sage: v.scale_time(-1)
879
Traceback (most recent call last):
880
...
881
ValueError: k must be positive
882
"""
883
if k <= 0:
884
raise ValueError, "k must be positive"
885
886
cdef Py_ssize_t i, n
887
n = self._length/k
888
if self._length % 2:
889
n += 1
890
cdef TimeSeries t = new_time_series(n) # in C / is floor division.
891
for i from 0 <= i < n:
892
t._values[i] = self._values[i*k]
893
return t
894
895
cpdef rescale(self, double s):
896
"""
897
Change ``self`` by multiplying every value in the series by ``s``.
898
899
INPUT:
900
901
- ``s`` -- a float.
902
903
EXAMPLES::
904
905
sage: v = finance.TimeSeries([5,4,1.3,2,8,10,3,-5]); v
906
[5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
907
sage: v.rescale(0.5)
908
sage: v
909
[2.5000, 2.0000, 0.6500, 1.0000, 4.0000, 5.0000, 1.5000, -2.5000]
910
"""
911
for i from 0 <= i < self._length:
912
self._values[i] = self._values[i] * s
913
914
def scale(self, double s):
915
"""
916
Return new time series obtained by multiplying every value in the
917
series by ``s``.
918
919
INPUT:
920
921
- ``s`` -- a float.
922
923
OUTPUT:
924
925
A new time series with all values multiplied by ``s``.
926
927
EXAMPLES::
928
929
sage: v = finance.TimeSeries([5,4,1.3,2,8,10,3,-5]); v
930
[5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
931
sage: v.scale(0.5)
932
[2.5000, 2.0000, 0.6500, 1.0000, 4.0000, 5.0000, 1.5000, -2.5000]
933
"""
934
cdef TimeSeries t = new_time_series(self._length)
935
for i from 0 <= i < self._length:
936
t._values[i] = self._values[i] * s
937
return t
938
939
def add_scalar(self, double s):
940
"""
941
Return new time series obtained by adding a scalar to every
942
value in the series.
943
944
.. NOTE::
945
946
To add componentwise, use the ``add_entries`` method.
947
948
INPUT:
949
950
- ``s`` -- a float.
951
952
OUTPUT:
953
954
A new time series with ``s`` added to all values.
955
956
EXAMPLES::
957
958
sage: v = finance.TimeSeries([5,4,1.3,2,8,10,3,-5]); v
959
[5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
960
sage: v.add_scalar(0.5)
961
[5.5000, 4.5000, 1.8000, 2.5000, 8.5000, 10.5000, 3.5000, -4.5000]
962
"""
963
cdef TimeSeries t = new_time_series(self._length)
964
for i from 0 <= i < self._length:
965
t._values[i] = self._values[i] + s
966
return t
967
968
def add_entries(self, t):
969
"""
970
Add corresponding entries of ``self`` and ``t`` together,
971
extending either ``self`` or ``t`` by 0's if they do
972
not have the same length.
973
974
.. NOTE::
975
976
To add a single number to the entries of a time series,
977
use the ``add_scalar`` method.
978
979
INPUT:
980
981
- ``t`` -- a time series.
982
983
OUTPUT:
984
985
A time series with length the maxima of the lengths of
986
``self`` and ``t``.
987
988
EXAMPLES::
989
990
sage: v = finance.TimeSeries([1,2,-5]); v
991
[1.0000, 2.0000, -5.0000]
992
sage: v.add_entries([3,4])
993
[4.0000, 6.0000, -5.0000]
994
sage: v.add_entries(v)
995
[2.0000, 4.0000, -10.0000]
996
sage: v.add_entries([3,4,7,18.5])
997
[4.0000, 6.0000, 2.0000, 18.5000]
998
"""
999
if not PY_TYPE_CHECK(t, TimeSeries):
1000
t = TimeSeries(t)
1001
cdef Py_ssize_t i, n
1002
cdef TimeSeries T = t, shorter, longer
1003
if self._length > T._length:
1004
shorter = T
1005
longer = self
1006
else:
1007
shorter = self
1008
longer = T
1009
1010
n = longer._length
1011
cdef TimeSeries v = new_time_series(n)
1012
1013
# Iterate up to the length of the shorter one
1014
for i from 0 <= i < shorter._length:
1015
v._values[i] = shorter._values[i] + longer._values[i]
1016
1017
# Copy over the rest of the values
1018
if shorter._length != n:
1019
memcpy(v._values + shorter._length,
1020
longer._values + shorter._length,
1021
sizeof(double)*(v._length - shorter._length))
1022
1023
return v
1024
1025
def show(self, *args, **kwds):
1026
"""
1027
Calls plot and passes all arguments onto the plot function. This is
1028
thus just an alias for plot.
1029
1030
EXAMPLES:
1031
1032
Draw a plot of a time series::
1033
1034
sage: finance.TimeSeries([1..10]).show()
1035
"""
1036
return self.plot(*args, **kwds)
1037
1038
def plot(self, Py_ssize_t plot_points=1000, points=False, **kwds):
1039
r"""
1040
Return a plot of this time series as a line or points through
1041
`(i,T(i))`, where `i` ranges over nonnegative integers up to the
1042
length of ``self``.
1043
1044
INPUT:
1045
1046
- ``plot_points`` -- (default: 1000) 0 or positive integer. Only
1047
plot the given number of equally spaced points in the time series.
1048
If 0, plot all points.
1049
1050
- ``points`` -- bool (default: ``False``). If ``True``, return just
1051
the points of the time series.
1052
1053
- ``**kwds`` -- passed to the line or point command.
1054
1055
EXAMPLES::
1056
1057
sage: v = finance.TimeSeries([5,4,1.3,2,8,10,3,-5]); v
1058
[5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
1059
sage: v.plot()
1060
sage: v.plot(points=True)
1061
sage: v.plot() + v.plot(points=True, rgbcolor='red')
1062
sage: v.plot() + v.plot(points=True, rgbcolor='red', pointsize=50)
1063
"""
1064
from sage.plot.all import line, point
1065
cdef Py_ssize_t s
1066
1067
if self._length < plot_points:
1068
plot_points = 0
1069
1070
if plot_points > 0:
1071
s = self._length/plot_points
1072
if plot_points <= 0:
1073
raise ValueError, "plot_points must be a positive integer"
1074
v = self.scale_time(s).list()[:plot_points]
1075
else:
1076
s = 1
1077
v = self.list()
1078
w = [(i * s, y) for i,y in enumerate(v)]
1079
if points:
1080
L = point(w, **kwds)
1081
else:
1082
L = line(w, **kwds)
1083
L.axes_range(ymin=min(v), ymax=max(v), xmin=0, xmax=len(v)*s)
1084
return L
1085
1086
def simple_moving_average(self, Py_ssize_t k):
1087
"""
1088
Return the moving average time series over the last ``k`` time units.
1089
Assumes the input time series was constant with its starting value
1090
for negative time. The t-th step of the output is the sum of
1091
the previous ``k - 1`` steps of ``self`` and the ``k``-th step
1092
divided by ``k``. Thus ``k`` values are averaged at each point.
1093
1094
INPUT:
1095
1096
- ``k`` -- positive integer.
1097
1098
OUTPUT:
1099
1100
A time series with the same number of steps as ``self``.
1101
1102
EXAMPLES::
1103
1104
sage: v = finance.TimeSeries([1,1,1,2,3]); v
1105
[1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1106
sage: v.simple_moving_average(0)
1107
[1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1108
sage: v.simple_moving_average(1)
1109
[1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1110
sage: v.simple_moving_average(2)
1111
[1.0000, 1.0000, 1.0000, 1.5000, 2.5000]
1112
sage: v.simple_moving_average(3)
1113
[1.0000, 1.0000, 1.0000, 1.3333, 2.0000]
1114
"""
1115
if k == 0 or k == 1:
1116
return self.__copy__()
1117
if k <= 0:
1118
raise ValueError, "k must be positive"
1119
cdef Py_ssize_t i
1120
cdef TimeSeries t = new_time_series(self._length)
1121
if self._length == 0:
1122
return t
1123
1124
cdef double s = self._values[0] * k
1125
for i from 0 <= i < self._length:
1126
if i >= k:
1127
s -= self._values[i-k]
1128
else:
1129
s -= self._values[0]
1130
# I have a slight concern about accumulated rounding error given how
1131
# this algorithm adds and subtracts.
1132
s += self._values[i]
1133
t._values[i] = s/k
1134
return t
1135
1136
def exponential_moving_average(self, double alpha):
1137
"""
1138
Return the exponential moving average time series. Assumes
1139
the input time series was constant with its starting value for
1140
negative time. The t-th step of the output is the sum of the
1141
previous k-1 steps of ``self`` and the k-th step divided by k.
1142
1143
The 0-th term is formally undefined, so we define it to be 0,
1144
and we define the first term to be ``self[0]``.
1145
1146
INPUT:
1147
1148
- ``alpha`` -- float; a smoothing factor with ``0 <= alpha <= 1``.
1149
1150
OUTPUT:
1151
1152
A time series with the same number of steps as ``self``.
1153
1154
EXAMPLES::
1155
1156
sage: v = finance.TimeSeries([1,1,1,2,3]); v
1157
[1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1158
sage: v.exponential_moving_average(0)
1159
[0.0000, 1.0000, 1.0000, 1.0000, 1.0000]
1160
sage: v.exponential_moving_average(1)
1161
[0.0000, 1.0000, 1.0000, 1.0000, 2.0000]
1162
sage: v.exponential_moving_average(0.5)
1163
[0.0000, 1.0000, 1.0000, 1.0000, 1.5000]
1164
1165
Some more examples::
1166
1167
sage: v = finance.TimeSeries([1,2,3,4,5])
1168
sage: v.exponential_moving_average(1)
1169
[0.0000, 1.0000, 2.0000, 3.0000, 4.0000]
1170
sage: v.exponential_moving_average(0)
1171
[0.0000, 1.0000, 1.0000, 1.0000, 1.0000]
1172
"""
1173
if alpha < 0 or alpha > 1:
1174
raise ValueError, "alpha must be between 0 and 1"
1175
cdef Py_ssize_t i
1176
cdef TimeSeries t = new_time_series(self._length)
1177
if self._length == 0:
1178
return t
1179
t._values[0] = 0
1180
if self._length == 1:
1181
return t
1182
t._values[1] = self._values[0]
1183
for i from 2 <= i < self._length:
1184
t._values[i] = alpha * self._values[i-1] + (1-alpha) *t._values[i-1]
1185
return t
1186
1187
def sums(self, double s=0):
1188
"""
1189
Return the new time series got by taking the running partial
1190
sums of the terms of this time series.
1191
1192
INPUT:
1193
1194
- ``s`` -- starting value for partial sums.
1195
1196
OUTPUT:
1197
1198
A time series.
1199
1200
EXAMPLES::
1201
1202
sage: v = finance.TimeSeries([1,1,1,2,3]); v
1203
[1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1204
sage: v.sums()
1205
[1.0000, 2.0000, 3.0000, 5.0000, 8.0000]
1206
"""
1207
cdef Py_ssize_t i
1208
cdef TimeSeries t = new_time_series(self._length)
1209
for i from 0 <= i < self._length:
1210
s += self._values[i]
1211
t._values[i] = s
1212
return t
1213
1214
cpdef double sum(self):
1215
"""
1216
Return the sum of all the entries of ``self``. If ``self`` has
1217
length 0, returns 0.
1218
1219
OUTPUT:
1220
1221
A double.
1222
1223
EXAMPLES::
1224
1225
sage: v = finance.TimeSeries([1,1,1,2,3]); v
1226
[1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1227
sage: v.sum()
1228
8.0
1229
"""
1230
cdef double s = 0
1231
cdef Py_ssize_t i
1232
for i from 0 <= i < self._length:
1233
s += self._values[i]
1234
return s
1235
1236
def prod(self):
1237
"""
1238
Return the product of all the entries of ``self``. If ``self`` has
1239
length 0, returns 1.
1240
1241
OUTPUT:
1242
1243
A double.
1244
1245
EXAMPLES::
1246
1247
sage: v = finance.TimeSeries([1,1,1,2,3]); v
1248
[1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1249
sage: v.prod()
1250
6.0
1251
"""
1252
cdef double s = 1
1253
cdef Py_ssize_t i
1254
for i from 0 <= i < self._length:
1255
s *= self._values[i]
1256
return s
1257
1258
1259
def mean(self):
1260
"""
1261
Return the mean (average) of the elements of ``self``.
1262
1263
OUTPUT:
1264
1265
A double.
1266
1267
EXAMPLES::
1268
1269
sage: v = finance.TimeSeries([1,1,1,2,3]); v
1270
[1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1271
sage: v.mean()
1272
1.6
1273
"""
1274
return self.sum() / self._length
1275
1276
def pow(self, double k):
1277
"""
1278
Return a new time series with every elements of ``self`` raised to the
1279
k-th power.
1280
1281
INPUT:
1282
1283
- ``k`` -- a float.
1284
1285
OUTPUT:
1286
1287
A time series.
1288
1289
EXAMPLES::
1290
1291
sage: v = finance.TimeSeries([1,1,1,2,3]); v
1292
[1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1293
sage: v.pow(2)
1294
[1.0000, 1.0000, 1.0000, 4.0000, 9.0000]
1295
"""
1296
cdef Py_ssize_t i
1297
cdef TimeSeries t = new_time_series(self._length)
1298
for i from 0 <= i < self._length:
1299
t._values[i] = pow(self._values[i], k)
1300
return t
1301
1302
def moment(self, int k):
1303
"""
1304
Return the k-th moment of ``self``, which is just the
1305
mean of the k-th powers of the elements of ``self``.
1306
1307
INPUT:
1308
1309
- ``k`` -- a positive integer.
1310
1311
OUTPUT:
1312
1313
A double.
1314
1315
EXAMPLES::
1316
1317
sage: v = finance.TimeSeries([1,1,1,2,3]); v
1318
[1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1319
sage: v.moment(1)
1320
1.6
1321
sage: v.moment(2)
1322
3.2
1323
"""
1324
if k <= 0:
1325
raise ValueError, "k must be positive"
1326
if k == 1:
1327
return self.mean()
1328
cdef double s = 0
1329
cdef Py_ssize_t i
1330
for i from 0 <= i < self._length:
1331
s += pow(self._values[i], k)
1332
return s / self._length
1333
1334
def central_moment(self, int k):
1335
"""
1336
Return the k-th central moment of ``self``, which is just the mean
1337
of the k-th powers of the differences ``self[i] - mu``, where ``mu`` is
1338
the mean of ``self``.
1339
1340
INPUT:
1341
1342
- ``k`` -- a positive integer.
1343
1344
OUTPUT:
1345
1346
A double.
1347
1348
EXAMPLES::
1349
1350
sage: v = finance.TimeSeries([1,2,3])
1351
sage: v.central_moment(2)
1352
0.6666666666666666
1353
1354
Note that the central moment is different from the moment
1355
here, since the mean is not `0`::
1356
1357
sage: v.moment(2) # doesn't subtract off mean
1358
4.666666666666667
1359
1360
We compute the central moment directly::
1361
1362
sage: mu = v.mean(); mu
1363
2.0
1364
sage: ((1-mu)^2 + (2-mu)^2 + (3-mu)^2) / 3
1365
0.6666666666666666
1366
"""
1367
if k == 1:
1368
return float(0)
1369
mu = self.mean()
1370
# We could make this slightly faster by doing the add scalar
1371
# and moment calculation together. But that would be nasty.
1372
return self.add_scalar(-mu).moment(k)
1373
1374
def covariance(self, TimeSeries other):
1375
r"""
1376
Return the covariance of the time series ``self`` and ``other``.
1377
1378
INPUT:
1379
1380
- ``self``, ``other`` -- time series.
1381
1382
Whichever time series has more terms is truncated.
1383
1384
EXAMPLES::
1385
1386
sage: v = finance.TimeSeries([1,-2,3]); w = finance.TimeSeries([4,5,-10])
1387
sage: v.covariance(w)
1388
-11.777777777777779
1389
"""
1390
cdef double mu = self.mean(), mu2 = other.mean()
1391
cdef double s = 0
1392
cdef Py_ssize_t i
1393
cdef Py_ssize_t n = min(self._length, other._length)
1394
for i from 0 <= i < n:
1395
s += (self._values[i] - mu)*(other._values[i] - mu2)
1396
return s / n
1397
# NOTE: There is also a formula
1398
# self._values[i]*other._values[i] - mu*mu2
1399
# but that seems less numerically stable (?), and when tested
1400
# was not noticeably faster.
1401
1402
def autocovariance(self, Py_ssize_t k=0):
1403
r"""
1404
Return the k-th autocovariance function `\gamma(k)` of ``self``.
1405
This is the covariance of ``self`` with ``self`` shifted by `k`, i.e.,
1406
1407
.. MATH::
1408
1409
\left.
1410
\left( \sum_{t=0}^{n-k-1} (x_t - \mu)(x_{t + k} - \mu) \right)
1411
\right/ n,
1412
1413
where `n` is the length of ``self``.
1414
1415
Note the denominator of `n`, which gives a "better" sample
1416
estimator.
1417
1418
INPUT:
1419
1420
- ``k`` -- a nonnegative integer (default: 0)
1421
1422
OUTPUT:
1423
1424
A float.
1425
1426
EXAMPLES::
1427
1428
sage: v = finance.TimeSeries([13,8,15,4,4,12,11,7,14,12])
1429
sage: v.autocovariance(0)
1430
14.4
1431
sage: mu = v.mean(); sum([(a-mu)^2 for a in v])/len(v)
1432
14.4
1433
sage: v.autocovariance(1)
1434
-2.7
1435
sage: mu = v.mean(); sum([(v[i]-mu)*(v[i+1]-mu) for i in range(len(v)-1)])/len(v)
1436
-2.7
1437
sage: v.autocovariance(1)
1438
-2.7
1439
1440
We illustrate with a random sample that an independently and
1441
identically distributed distribution with zero mean and
1442
variance `\sigma^2` has autocovariance function `\gamma(h)`
1443
with `\gamma(0) = \sigma^2` and `\gamma(h) = 0` for `h\neq 0`. ::
1444
1445
sage: set_random_seed(0)
1446
sage: v = finance.TimeSeries(10^6)
1447
sage: v.randomize('normal', 0, 5)
1448
[3.3835, -2.0055, 1.7882, -2.9319, -4.6827 ... -5.1868, 9.2613, 0.9274, -6.2282, -8.7652]
1449
sage: v.autocovariance(0)
1450
24.95410689...
1451
sage: v.autocovariance(1)
1452
-0.00508390...
1453
sage: v.autocovariance(2)
1454
0.022056325...
1455
sage: v.autocovariance(3)
1456
-0.01902000...
1457
"""
1458
cdef double mu = self.mean()
1459
cdef double s = 0
1460
cdef Py_ssize_t i
1461
cdef Py_ssize_t n = self._length - k
1462
for i from 0 <= i < n:
1463
s += (self._values[i] - mu)*(self._values[i+k] - mu)
1464
return s / self._length
1465
1466
def correlation(self, TimeSeries other):
1467
"""
1468
Return the correlation of ``self`` and ``other``, which is the
1469
covariance of ``self`` and ``other`` divided by the product of their
1470
standard deviation.
1471
1472
INPUT:
1473
1474
- ``self``, ``other`` -- time series.
1475
1476
Whichever time series has more terms is truncated.
1477
1478
EXAMPLES::
1479
1480
sage: v = finance.TimeSeries([1,-2,3]); w = finance.TimeSeries([4,5,-10])
1481
sage: v.correlation(w)
1482
-0.558041609...
1483
sage: v.covariance(w)/(v.standard_deviation() * w.standard_deviation())
1484
-0.558041609...
1485
"""
1486
return self.covariance(other) / (self.standard_deviation() * other.standard_deviation())
1487
1488
def autocorrelation(self, Py_ssize_t k=1):
1489
r"""
1490
Return the k-th sample autocorrelation of this time series
1491
`x_i`.
1492
1493
Let `\mu` be the sample mean. Then the sample autocorrelation
1494
function is
1495
1496
.. MATH::
1497
1498
\frac{\sum_{t=0}^{n-k-1} (x_t - \mu)(x_{t+k} - \mu) }
1499
{\sum_{t=0}^{n-1} (x_t - \mu)^2}.
1500
1501
Note that the variance must be nonzero or you will get a
1502
``ZeroDivisionError``.
1503
1504
INPUT:
1505
1506
- ``k`` -- a nonnegative integer (default: 1)
1507
1508
OUTPUT:
1509
1510
A time series.
1511
1512
EXAMPLE::
1513
1514
sage: v = finance.TimeSeries([13,8,15,4,4,12,11,7,14,12])
1515
sage: v.autocorrelation()
1516
-0.1875
1517
sage: v.autocorrelation(1)
1518
-0.1875
1519
sage: v.autocorrelation(0)
1520
1.0
1521
sage: v.autocorrelation(2)
1522
-0.20138888888888887
1523
sage: v.autocorrelation(3)
1524
0.18055555555555555
1525
1526
sage: finance.TimeSeries([1..1000]).autocorrelation()
1527
0.997
1528
"""
1529
return self.autocovariance(k) / self.variance(bias=True)
1530
1531
def variance(self, bias=False):
1532
"""
1533
Return the variance of the elements of ``self``, which is the mean
1534
of the squares of the differences from the mean.
1535
1536
INPUT:
1537
1538
- ``bias`` -- bool (default: ``False``); if ``False``, divide by
1539
``self.length() - 1`` instead of ``self.length()`` to give a less
1540
biased estimator for the variance.
1541
1542
OUTPUT:
1543
1544
A double.
1545
1546
EXAMPLE::
1547
1548
sage: v = finance.TimeSeries([1,1,1,2,3]); v
1549
[1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1550
sage: v.variance()
1551
0.8
1552
sage: v.variance(bias=True)
1553
0.64
1554
1555
TESTS::
1556
1557
sage: finance.TimeSeries([1]).variance()
1558
0.0
1559
sage: finance.TimeSeries([]).variance()
1560
0.0
1561
"""
1562
if self._length <= 1:
1563
return float(0)
1564
cdef double mu = self.mean()
1565
cdef double s = 0
1566
cdef double a
1567
cdef Py_ssize_t i
1568
for i from 0 <= i < self._length:
1569
a = self._values[i] - mu
1570
s += a * a
1571
if bias:
1572
return s / self._length
1573
else:
1574
return s / (self._length - 1)
1575
1576
def standard_deviation(self, bias=False):
1577
"""
1578
Return the standard deviation of the entries of ``self``.
1579
1580
INPUT:
1581
1582
- ``bias`` -- bool (default: ``False``); if ``False``, divide by
1583
``self.length() - 1`` instead of ``self.length()`` to give a less
1584
biased estimator for the variance.
1585
1586
OUTPUT:
1587
1588
A double.
1589
1590
EXAMPLES::
1591
1592
sage: v = finance.TimeSeries([1,1,1,2,3]); v
1593
[1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1594
sage: v.standard_deviation()
1595
0.8944271909...
1596
sage: v.standard_deviation(bias=True)
1597
0.8
1598
1599
TESTS::
1600
1601
sage: finance.TimeSeries([1]).standard_deviation()
1602
0.0
1603
sage: finance.TimeSeries([]).standard_deviation()
1604
0.0
1605
"""
1606
return sqrt(self.variance(bias=bias))
1607
1608
def range_statistic(self, b=None):
1609
r"""
1610
Return the rescaled range statistic `R/S` of ``self``, which is
1611
defined as follows (see Hurst 1951). If the optional
1612
parameter ``b`` is given, return the average of `R/S` range
1613
statistics of disjoint blocks of size ``b``.
1614
1615
Let `\sigma` be the standard deviation of the sequence of
1616
differences of ``self``, and let `Y_k` be the k-th term of ``self``.
1617
Let `n` be the number of terms of ``self``, and set
1618
`Z_k = Y_k - ((k+1)/n) \cdot Y_n`. Then
1619
1620
.. MATH::
1621
1622
R/S = \big( \max(Z_k) - \min(Z_k) \big) / \sigma
1623
1624
where the max and min are over all `Z_k`.
1625
Basically replacing `Y_k` by `Z_k` allows us to measure
1626
the difference from the line from the origin to `(n,Y_n)`.
1627
1628
INPUT:
1629
1630
- ``self`` -- a time series (*not* the series of differences).
1631
1632
- ``b`` -- integer (default: ``None``); if given instead divide the
1633
input time series up into ``j = floor(n/b)`` disjoint
1634
blocks, compute the `R/S` statistic for each block,
1635
and return the average of those `R/S` statistics.
1636
1637
OUTPUT:
1638
1639
A float.
1640
1641
EXAMPLES:
1642
1643
Notice that if we make a Brownian motion random walk, there
1644
is no difference if we change the standard deviation. ::
1645
1646
sage: set_random_seed(0); finance.TimeSeries(10^6).randomize('normal').sums().range_statistic()
1647
1897.8392602...
1648
sage: set_random_seed(0); finance.TimeSeries(10^6).randomize('normal',0,100).sums().range_statistic()
1649
1897.8392602...
1650
"""
1651
cdef Py_ssize_t j, k, n = self._length
1652
1653
if b is not None:
1654
j = n // b
1655
return sum([self[k*b:(k+1)*b].add_scalar(-self[k*b]).range_statistic() for k in range(j)]) / j
1656
1657
cdef double Yn = self._values[n - 1]
1658
cdef TimeSeries Z = self.__copy__()
1659
for k from 0 <= k < n:
1660
Z._values[k] -= (k+1)*Yn / n
1661
1662
sigma = self.diffs().standard_deviation()
1663
1664
return (Z.max() - Z.min()) / sigma
1665
1666
def hurst_exponent(self):
1667
"""
1668
Returns an estimate of the Hurst exponent of this time series.
1669
We use the algorithm from pages 61 -- 63 of [Peteres, Fractal
1670
Market Analysis (1994); see Google Books].
1671
1672
We define the Hurst exponent of a constant time series to be 1.
1673
1674
EXAMPLES:
1675
1676
The Hurst exponent of Brownian motion is 1/2. We approximate
1677
it with some specific samples. Note that the estimator is
1678
biased and slightly overestimates. ::
1679
1680
sage: set_random_seed(0)
1681
sage: bm = finance.TimeSeries(10^5).randomize('normal').sums(); bm
1682
[0.6767, 0.2756, 0.6332, 0.0469, -0.8897 ... 152.2437, 151.5327, 152.7629, 152.9169, 152.9084]
1683
sage: bm.hurst_exponent()
1684
0.527450972...
1685
1686
We compute the Hurst exponent of a simulated fractional Brownian
1687
motion with Hurst parameter 0.7. This function estimates the
1688
Hurst exponent as 0.706511951... ::
1689
1690
sage: set_random_seed(0)
1691
sage: fbm = finance.fractional_brownian_motion_simulation(0.7,0.1,10^5,1)[0]
1692
sage: fbm.hurst_exponent()
1693
0.706511951...
1694
1695
Another example with small Hurst exponent (notice the overestimation).
1696
1697
::
1698
1699
sage: fbm = finance.fractional_brownian_motion_simulation(0.2,0.1,10^5,1)[0]
1700
sage: fbm.hurst_exponent()
1701
0.278997441...
1702
1703
We compute the mean Hurst exponent of 100 simulated multifractal
1704
cascade random walks::
1705
1706
sage: set_random_seed(0)
1707
sage: y = finance.multifractal_cascade_random_walk_simulation(3700,0.02,0.01,0.01,1000,100)
1708
sage: finance.TimeSeries([z.hurst_exponent() for z in y]).mean()
1709
0.579848225779347...
1710
1711
We compute the mean Hurst exponent of 100 simulated Markov switching
1712
multifractal time series. The Hurst exponent is quite small. ::
1713
1714
sage: set_random_seed(0)
1715
sage: msm = finance.MarkovSwitchingMultifractal(8,1.4,0.5,0.95,3)
1716
sage: y = msm.simulations(1000,100)
1717
sage: finance.TimeSeries([z.hurst_exponent() for z in y]).mean()
1718
0.286102325623705...
1719
"""
1720
# We use disjoint blocks of size 8, 16, 32, ....
1721
cdef Py_ssize_t k = 8
1722
if self._length <= 32: # small data, estimate of Hurst will suck but...
1723
k = 4
1724
v0 = []
1725
v1 = []
1726
while k <= self._length:
1727
try:
1728
v1.append(log(self.range_statistic(k)))
1729
v0.append(log(k))
1730
except ZeroDivisionError: # 0 standard deviation
1731
pass
1732
k *= 2
1733
if len(v0) == 0:
1734
return float(1)
1735
if len(v0) == 1:
1736
return v1[0]/v0[0]
1737
import numpy
1738
coeffs = numpy.polyfit(v0,v1,1)
1739
return coeffs[0]
1740
1741
def min(self, bint index=False):
1742
"""
1743
Return the smallest value in this time series. If this series
1744
has length 0 we raise a ``ValueError``.
1745
1746
INPUT:
1747
1748
- ``index`` -- bool (default: ``False``); if ``True``, also return
1749
index of minimal entry.
1750
1751
OUTPUT:
1752
1753
- float -- smallest value.
1754
1755
- integer -- index of smallest value; only returned if ``index=True``.
1756
1757
EXAMPLES::
1758
1759
sage: v = finance.TimeSeries([1,-4,3,-2.5,-4])
1760
sage: v.min()
1761
-4.0
1762
sage: v.min(index=True)
1763
(-4.0, 1)
1764
"""
1765
if self._length == 0:
1766
raise ValueError, "min() arg is an empty sequence"
1767
cdef Py_ssize_t i, j
1768
cdef double s = self._values[0]
1769
j = 0
1770
for i from 1 <= i < self._length:
1771
if self._values[i] < s:
1772
s = self._values[i]
1773
j = i
1774
if index:
1775
return s, j
1776
else:
1777
return s
1778
1779
def max(self, bint index=False):
1780
"""
1781
Return the largest value in this time series. If this series
1782
has length 0 we raise a ``ValueError``.
1783
1784
INPUT:
1785
1786
- ``index`` -- bool (default: ``False``); if ``True``, also return
1787
index of maximum entry.
1788
1789
OUTPUT:
1790
1791
- float -- largest value.
1792
1793
- integer -- index of largest value; only returned if ``index=True``.
1794
1795
EXAMPLES::
1796
1797
sage: v = finance.TimeSeries([1,-4,3,-2.5,-4,3])
1798
sage: v.max()
1799
3.0
1800
sage: v.max(index=True)
1801
(3.0, 2)
1802
"""
1803
if self._length == 0:
1804
raise ValueError, "max() arg is an empty sequence"
1805
cdef Py_ssize_t i, j = 0
1806
cdef double s = self._values[0]
1807
for i from 1 <= i < self._length:
1808
if self._values[i] > s:
1809
s = self._values[i]
1810
j = i
1811
if index:
1812
return s, j
1813
else:
1814
return s
1815
1816
def clip_remove(self, min=None, max=None):
1817
"""
1818
Return new time series obtained from ``self`` by removing all
1819
values that are less than or equal to a certain minimum value
1820
or greater than or equal to a certain maximum.
1821
1822
INPUT:
1823
1824
- ``min`` -- (default: ``None``) ``None`` or double.
1825
1826
- ``max`` -- (default: ``None``) ``None`` or double.
1827
1828
OUTPUT:
1829
1830
A time series.
1831
1832
EXAMPLES::
1833
1834
sage: v = finance.TimeSeries([1..10])
1835
sage: v.clip_remove(3,7)
1836
[3.0000, 4.0000, 5.0000, 6.0000, 7.0000]
1837
sage: v.clip_remove(3)
1838
[3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, 10.0000]
1839
sage: v.clip_remove(max=7)
1840
[1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000]
1841
"""
1842
cdef Py_ssize_t i, j
1843
cdef TimeSeries t
1844
cdef double mn, mx
1845
cdef double x
1846
if min is None and max is None:
1847
return self.copy()
1848
# This code is ugly but I think as fast as possible.
1849
if min is None and max is not None:
1850
# Return everything <= max
1851
j = 0
1852
mx = max
1853
for i from 0 <= i < self._length:
1854
if self._values[i] <= mx:
1855
j += 1
1856
1857
t = TimeSeries(j)
1858
j = 0
1859
for i from 0 <= i < self._length:
1860
if self._values[i] <= mx:
1861
t._values[j] = self._values[i]
1862
j += 1
1863
elif max is None and min is not None:
1864
# Return everything >= min
1865
j = 0
1866
mn = min
1867
for i from 0 <= i < self._length:
1868
if self._values[i] >= mn:
1869
j += 1
1870
t = TimeSeries(j)
1871
j = 0
1872
for i from 0 <= i < self._length:
1873
if self._values[i] >= mn:
1874
t._values[j] = self._values[i]
1875
j += 1
1876
else:
1877
# Return everything between min and max
1878
j = 0
1879
mn = min; mx = max
1880
for i from 0 <= i < self._length:
1881
x = self._values[i]
1882
if x >= mn and x <= mx:
1883
j += 1
1884
t = TimeSeries(j)
1885
j = 0
1886
for i from 0 <= i < self._length:
1887
x = self._values[i]
1888
if x >= mn and x <= mx:
1889
t._values[j] = x
1890
j += 1
1891
return t
1892
1893
def histogram(self, Py_ssize_t bins=50, bint normalize=False):
1894
"""
1895
Return the frequency histogram of the values in
1896
this time series divided up into the given
1897
number of bins.
1898
1899
INPUT:
1900
1901
- ``bins`` -- a positive integer (default: 50)
1902
1903
- ``normalize`` -- (default: ``False``) whether to normalize so the
1904
total area in the bars of the histogram is 1.
1905
1906
OUTPUT:
1907
1908
- counts -- list of counts of numbers of elements in
1909
each bin.
1910
1911
- endpoints -- list of 2-tuples (a,b) that give the
1912
endpoints of the bins.
1913
1914
EXAMPLES::
1915
1916
sage: v = finance.TimeSeries([5,4,1.3,2,8,10,3,-5]); v
1917
[5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
1918
sage: v.histogram(3)
1919
([1, 4, 3], [(-5.0, 0.0), (0.0, 5.0), (5.0, 10.0)])
1920
"""
1921
if bins <= 0:
1922
raise ValueError, "bins must be positive"
1923
1924
cdef double mn = self.min(), mx = self.max()
1925
cdef double r = mx - mn, step = r/bins
1926
cdef Py_ssize_t j
1927
1928
if r == 0:
1929
raise ValueError, "bins have 0 width"
1930
1931
v = [(mn + j*step, mn + (j+1)*step) for j in range(bins)]
1932
if self._length == 0:
1933
return [], v
1934
1935
if step == 0:
1936
counts = [0]*bins
1937
counts[0] = [self._length]
1938
return counts, v
1939
1940
cdef Py_ssize_t i
1941
cdef Py_ssize_t* cnts = <Py_ssize_t*>sage_malloc(sizeof(Py_ssize_t)*bins)
1942
if cnts == NULL:
1943
raise MemoryError
1944
for i from 0 <= i < bins:
1945
cnts[i] = 0
1946
1947
for i from 0 <= i < self._length:
1948
j = int((self._values[i] - mn)/step)
1949
if j >= bins:
1950
j = bins-1
1951
cnts[j] += 1
1952
1953
b = 1.0/(self._length * step)
1954
if normalize:
1955
counts = [cnts[i]*b for i in range(bins)]
1956
else:
1957
counts = [cnts[i] for i in range(bins)]
1958
sage_free(cnts)
1959
1960
return counts, v
1961
1962
def plot_histogram(self, bins=50, normalize=True, **kwds):
1963
"""
1964
Return histogram plot of this time series with given number of bins.
1965
1966
INPUT:
1967
1968
- ``bins`` -- positive integer (default: 50)
1969
1970
- ``normalize`` -- (default: ``True``) whether to normalize so the
1971
total area in the bars of the histogram is 1.
1972
1973
OUTPUT:
1974
1975
A histogram plot.
1976
1977
EXAMPLES::
1978
1979
sage: v = finance.TimeSeries([1..50])
1980
sage: v.plot_histogram(bins=10)
1981
"""
1982
from sage.plot.all import polygon
1983
counts, intervals = self.histogram(bins, normalize=normalize)
1984
s = 0
1985
for i, (x0,x1) in enumerate(intervals):
1986
s += polygon([(x0,0), (x0,counts[i]), (x1,counts[i]), (x1,0)], **kwds)
1987
if len(intervals) > 0:
1988
s.axes_range(ymin=0, ymax=max(counts), xmin=intervals[0][0], xmax=intervals[-1][1])
1989
return s
1990
1991
def plot_candlestick(self, int bins=30):
1992
"""
1993
Return a candlestick plot of this time series with the given number
1994
of bins.
1995
1996
A candlestick plot is a style of bar-chart used to view open, high,
1997
low, and close stock data. At each bin, the line represents the
1998
high / low range. The bar represents the open / close range. The
1999
interval is colored blue if the open for that bin is less than the
2000
close. If the close is less than the open, then that bin is colored
2001
red instead.
2002
2003
INPUT:
2004
2005
- ``bins`` -- positive integer (default: 30), the number of bins
2006
or candles.
2007
2008
OUTPUT:
2009
2010
A candlestick plot.
2011
2012
EXAMPLES:
2013
2014
Here we look at the candlestick plot for Brownian motion::
2015
2016
sage: v = finance.TimeSeries(1000).randomize()
2017
sage: v.plot_candlestick(bins=20)
2018
"""
2019
from sage.plot.all import line, polygon, Graphics
2020
2021
cdef TimeSeries t = new_time_series(self._length)
2022
cdef TimeSeries s
2023
cdef int i, j, n
2024
bin_size = int(t._length/bins)
2025
rng = t._length - bin_size
2026
2027
memcpy(t._values, self._values, sizeof(double)*self._length)
2028
p = Graphics()
2029
2030
for i from 0 <= i < bins:
2031
n = i*bin_size
2032
s = new_time_series(bin_size)
2033
for j from 0 <= j < bin_size:
2034
s._values[j] = t._values[n + j]
2035
low = s.min()
2036
high = s.max()
2037
open = s._values[0]
2038
close = s._values[bin_size-1]
2039
left = n + bin_size/3
2040
mid = n + bin_size/2
2041
right = n + 2*bin_size/3
2042
2043
rgbcolor = 'blue' if open < close else 'red'
2044
2045
p += line([(mid, low), (mid, high)], rgbcolor=rgbcolor)
2046
p += polygon([(left, open), (right, open), (right, close), (left, close)], rgbcolor=rgbcolor)
2047
2048
return p
2049
2050
def numpy(self, copy=True):
2051
"""
2052
Return a NumPy version of this time series.
2053
2054
.. NOTE::
2055
2056
If copy is ``False``, return a NumPy 1-D array reference to
2057
exactly the same block of memory as this time series. This is
2058
very, very fast and makes it easy to quickly use all
2059
NumPy/SciPy functionality on ``self``. However, it is dangerous
2060
because when this time series goes out of scope and is garbage
2061
collected, the corresponding NumPy reference object will point
2062
to garbage.
2063
2064
INPUT:
2065
2066
- ``copy`` -- bool (default: ``True``)
2067
2068
OUTPUT:
2069
2070
A numpy 1-D array.
2071
2072
EXAMPLES::
2073
2074
sage: v = finance.TimeSeries([1,-3,4.5,-2])
2075
sage: w = v.numpy(copy=False); w
2076
array([ 1. , -3. , 4.5, -2. ])
2077
sage: type(w)
2078
<type 'numpy.ndarray'>
2079
sage: w.shape
2080
(4,)
2081
2082
Notice that changing ``w`` also changes ``v`` too! ::
2083
2084
sage: w[0] = 20
2085
sage: w
2086
array([ 20. , -3. , 4.5, -2. ])
2087
sage: v
2088
[20.0000, -3.0000, 4.5000, -2.0000]
2089
2090
If you want a separate copy do not give the ``copy=False`` option. ::
2091
2092
sage: z = v.numpy(); z
2093
array([ 20. , -3. , 4.5, -2. ])
2094
sage: z[0] = -10
2095
sage: v
2096
[20.0000, -3.0000, 4.5000, -2.0000]
2097
"""
2098
cnumpy.import_array() #This must be called before using the numpy C/api or you will get segfault
2099
cdef cnumpy.npy_intp dims[1]
2100
dims[0] = self._length
2101
cdef cnumpy.ndarray n = cnumpy.PyArray_SimpleNewFromData(1, dims, cnumpy.NPY_DOUBLE, self._values)
2102
if copy:
2103
return n.copy()
2104
else:
2105
# Py_INCREF(self)
2106
# n.base = self
2107
return n
2108
2109
def randomize(self, distribution='uniform', loc=0, scale=1, **kwds):
2110
r"""
2111
Randomize the entries in this time series, and return a reference
2112
to ``self``. Thus this function both changes ``self`` in place, and
2113
returns a copy of ``self``, for convenience.
2114
2115
INPUT:
2116
2117
- ``distribution`` -- (default: ``"uniform"``); supported values are:
2118
2119
- ``'uniform'`` -- from ``loc`` to ``loc + scale``
2120
2121
- ``'normal'`` -- mean ``loc`` and standard deviation ``scale``
2122
2123
- ``'semicircle'`` -- with center at ``loc`` (``scale`` is ignored)
2124
2125
- ``'lognormal'`` -- mean ``loc`` and standard deviation ``scale``
2126
2127
- ``loc`` -- float (default: 0)
2128
2129
- ``scale`` -- float (default: 1)
2130
2131
.. NOTE::
2132
2133
All random numbers are generated using algorithms that
2134
build on the high quality GMP random number function
2135
gmp_urandomb_ui. Thus this function fully respects the Sage
2136
``set_random_state`` command. It's not quite as fast as the C
2137
library random number generator, but is of course much better
2138
quality, and is platform independent.
2139
2140
EXAMPLES:
2141
2142
We generate 5 uniform random numbers in the interval [0,1]::
2143
2144
sage: set_random_seed(0)
2145
sage: finance.TimeSeries(5).randomize()
2146
[0.8685, 0.2816, 0.0229, 0.1456, 0.7314]
2147
2148
We generate 5 uniform random numbers from 5 to `5+2=7`::
2149
2150
sage: set_random_seed(0)
2151
sage: finance.TimeSeries(10).randomize('uniform', 5, 2)
2152
[6.7369, 5.5632, 5.0459, 5.2911, 6.4628, 5.2412, 5.2010, 5.2761, 5.5813, 5.5439]
2153
2154
We generate 5 normal random values with mean 0 and variance 1. ::
2155
2156
sage: set_random_seed(0)
2157
sage: finance.TimeSeries(5).randomize('normal')
2158
[0.6767, -0.4011, 0.3576, -0.5864, -0.9365]
2159
2160
We generate 10 normal random values with mean 5 and variance 2. ::
2161
2162
sage: set_random_seed(0)
2163
sage: finance.TimeSeries(10).randomize('normal', 5, 2)
2164
[6.3534, 4.1978, 5.7153, 3.8273, 3.1269, 2.9598, 3.7484, 6.7472, 3.8986, 4.6271]
2165
2166
We generate 5 values using the semicircle distribution. ::
2167
2168
sage: set_random_seed(0)
2169
sage: finance.TimeSeries(5).randomize('semicircle')
2170
[0.7369, -0.9541, 0.4628, -0.7990, -0.4187]
2171
2172
We generate 1 million normal random values and create a frequency
2173
histogram. ::
2174
2175
sage: set_random_seed(0)
2176
sage: a = finance.TimeSeries(10^6).randomize('normal')
2177
sage: a.histogram(10)[0]
2178
[36, 1148, 19604, 130699, 340054, 347870, 137953, 21290, 1311, 35]
2179
2180
We take the above values, and compute the proportion that lie within
2181
1, 2, 3, 5, and 6 standard deviations of the mean (0)::
2182
2183
sage: s = a.standard_deviation()
2184
sage: len(a.clip_remove(-s,s))/float(len(a))
2185
0.683094
2186
sage: len(a.clip_remove(-2*s,2*s))/float(len(a))
2187
0.954559
2188
sage: len(a.clip_remove(-3*s,3*s))/float(len(a))
2189
0.997228
2190
sage: len(a.clip_remove(-5*s,5*s))/float(len(a))
2191
0.999998
2192
2193
There were no "six sigma events"::
2194
2195
sage: len(a.clip_remove(-6*s,6*s))/float(len(a))
2196
1.0
2197
"""
2198
if distribution == 'uniform':
2199
self._randomize_uniform(loc, loc + scale)
2200
elif distribution == 'normal':
2201
self._randomize_normal(loc, scale)
2202
elif distribution == 'semicircle':
2203
self._randomize_semicircle(loc)
2204
elif distribution == 'lognormal':
2205
self._randomize_lognormal(loc, scale)
2206
else:
2207
raise NotImplementedError
2208
return self
2209
2210
def _randomize_uniform(self, double left, double right):
2211
"""
2212
Generates a uniform random distribution of doubles between ``left`` and
2213
``right`` and stores values in place.
2214
2215
INPUT:
2216
2217
- ``left`` -- left bound on random distribution.
2218
2219
- ``right`` -- right bound on random distribution.
2220
2221
EXAMPLES:
2222
2223
We generate 5 values distributed with respect to the uniform
2224
distribution over the interval [0,1]::
2225
2226
sage: v = finance.TimeSeries(5)
2227
sage: set_random_seed(0)
2228
sage: v.randomize('uniform')
2229
[0.8685, 0.2816, 0.0229, 0.1456, 0.7314]
2230
2231
We now test that the mean is indeed 0.5::
2232
2233
sage: v = finance.TimeSeries(10^6)
2234
sage: set_random_seed(0)
2235
sage: v.randomize('uniform').mean()
2236
0.50069085...
2237
"""
2238
if left >= right:
2239
raise ValueError, "left must be less than right"
2240
2241
cdef randstate rstate = current_randstate()
2242
cdef Py_ssize_t k
2243
cdef double d = right - left
2244
for k from 0 <= k < self._length:
2245
self._values[k] = rstate.c_rand_double() * d + left
2246
2247
def _randomize_normal(self, double m, double s):
2248
"""
2249
Generates a normal random distribution of doubles with mean ``m`` and
2250
standard deviation ``s`` and stores values in place. Uses the
2251
Box-Muller algorithm.
2252
2253
INPUT:
2254
2255
- ``m`` -- mean
2256
2257
- ``s`` -- standard deviation
2258
2259
EXAMPLES:
2260
2261
We generate 5 values distributed with respect to the normal
2262
distribution with mean 0 and standard deviation 1::
2263
2264
sage: set_random_seed(0)
2265
sage: v = finance.TimeSeries(5)
2266
sage: v.randomize('normal')
2267
[0.6767, -0.4011, 0.3576, -0.5864, -0.9365]
2268
2269
We now test that the mean is indeed 0::
2270
2271
sage: set_random_seed(0)
2272
sage: v = finance.TimeSeries(10^6)
2273
sage: v.randomize('normal').mean()
2274
6.2705472723...
2275
2276
The same test with mean equal to 2 and standard deviation equal
2277
to 5::
2278
2279
sage: set_random_seed(0)
2280
sage: v = finance.TimeSeries(10^6)
2281
sage: v.randomize('normal', 2, 5).mean()
2282
2.0003135273...
2283
"""
2284
# Ported from http://users.tkk.fi/~nbeijar/soft/terrain/source_o2/boxmuller.c
2285
# This the box muller algorithm.
2286
cdef randstate rstate = current_randstate()
2287
2288
cdef double x1, x2, w, y1, y2
2289
cdef Py_ssize_t k
2290
for k from 0 <= k < self._length:
2291
while 1:
2292
x1 = 2*rstate.c_rand_double() - 1
2293
x2 = 2*rstate.c_rand_double() - 1
2294
w = x1*x1 + x2*x2
2295
if w < 1: break
2296
w = sqrt( (-2*log(w))/w )
2297
y1 = x1 * w
2298
y2 = x2 * w
2299
self._values[k] = m + y1*s
2300
k += 1
2301
if k < self._length:
2302
self._values[k] = m + y2*s
2303
2304
def _randomize_semicircle(self, double center):
2305
"""
2306
Generates a semicircle random distribution of doubles about center
2307
and stores values in place. Uses the acceptance-rejection method.
2308
2309
INPUT:
2310
2311
- ``center`` -- the center of the semicircle distribution.
2312
2313
EXAMPLES:
2314
2315
We generate 5 values distributed with respect to the semicircle
2316
distribution located at center::
2317
2318
sage: v = finance.TimeSeries(5)
2319
sage: set_random_seed(0)
2320
sage: v.randomize('semicircle')
2321
[0.7369, -0.9541, 0.4628, -0.7990, -0.4187]
2322
2323
We now test that the mean is indeed the center::
2324
2325
sage: v = finance.TimeSeries(10^6)
2326
sage: set_random_seed(0)
2327
sage: v.randomize('semicircle').mean()
2328
0.0007207497...
2329
2330
The same test with center equal to 2::
2331
2332
sage: v = finance.TimeSeries(10^6)
2333
sage: set_random_seed(0)
2334
sage: v.randomize('semicircle', 2).mean()
2335
2.0007207497...
2336
"""
2337
cdef Py_ssize_t k
2338
cdef double x, y, s, d = 2, left = center - 1, z
2339
cdef randstate rstate = current_randstate()
2340
z = d*d
2341
s = 1.5707963267948966192 # pi/2
2342
for k from 0 <= k < self._length:
2343
while 1:
2344
x = rstate.c_rand_double() * d - 1
2345
y = rstate.c_rand_double() * s
2346
if y*y + x*x < 1:
2347
break
2348
self._values[k] = x + center
2349
2350
def _randomize_lognormal(self, double m, double s):
2351
r"""
2352
Generates a log-normal random distribution of doubles with mean ``m``
2353
and standard deviation ``s``. Uses Box-Muller algorithm and the
2354
identity: if `Y` is a random variable with normal distribution then
2355
`X = \exp(Y)` is a random variable with log-normal distribution.
2356
2357
INPUT:
2358
2359
- ``m`` -- mean
2360
2361
- ``s`` -- standard deviation
2362
2363
EXAMPLES:
2364
2365
We generate 5 values distributed with respect to the lognormal
2366
distribution with mean 0 and standard deviation 1::
2367
2368
sage: set_random_seed(0)
2369
sage: v = finance.TimeSeries(5)
2370
sage: v.randomize('lognormal')
2371
[1.9674, 0.6696, 1.4299, 0.5563, 0.3920]
2372
2373
We now test that the mean is indeed `\sqrt{e}`::
2374
2375
sage: set_random_seed(0)
2376
sage: v = finance.TimeSeries(10^6)
2377
sage: v.randomize('lognormal').mean()
2378
1.647351973...
2379
sage: exp(0.5)
2380
1.648721270...
2381
2382
A log-normal distribution can be simply thought of as the logarithm
2383
of a normally distributed data set. We test that here by generating
2384
5 values distributed with respect to the normal distribution with mean
2385
0 and standard deviation 1::
2386
2387
sage: set_random_seed(0)
2388
sage: w = finance.TimeSeries(5)
2389
sage: w.randomize('normal')
2390
[0.6767, -0.4011, 0.3576, -0.5864, -0.9365]
2391
sage: exp(w)
2392
[1.9674, 0.6696, 1.4299, 0.5563, 0.3920]
2393
"""
2394
# Ported from http://users.tkk.fi/~nbeijar/soft/terrain/source_o2/boxmuller.c
2395
# This the box muller algorithm.
2396
cdef randstate rstate = current_randstate()
2397
cdef double x1, x2, w, y1, y2
2398
cdef Py_ssize_t k
2399
for k from 0 <= k < self._length:
2400
while 1:
2401
x1 = 2*rstate.c_rand_double() - 1
2402
x2 = 2*rstate.c_rand_double() - 1
2403
w = x1*x1 + x2*x2
2404
if w < 1: break
2405
w = sqrt( (-2*log(w))/w )
2406
y1 = x1 * w
2407
y2 = x2 * w
2408
self._values[k] = exp(m + y1*s)
2409
k += 1
2410
if k < self._length:
2411
self._values[k] = exp(m + y2*s)
2412
2413
def fft(self, bint overwrite=False):
2414
r"""
2415
Return the real discrete fast Fourier transform of ``self``, as a
2416
real time series:
2417
2418
.. MATH::
2419
2420
[y(0),\Re(y(1)),\Im(y(1)),\dots,\Re(y(n/2))] \text{ if $n$ is even}
2421
2422
[y(0),\Re(y(1)),\Im(y(1)),\dots,\Re(y(n/2)),\Im(y(n/2))] \text{ if $n$ is odd}
2423
2424
where
2425
2426
.. MATH::
2427
2428
y(j) = \sum_{k=0}^{n-1} x[k] \cdot \exp(-\sqrt{-1} \cdot jk \cdot 2\pi/n)
2429
2430
for `j = 0,\dots,n-1`. Note that `y(-j) = y(n-j)`.
2431
2432
EXAMPLES::
2433
2434
sage: v = finance.TimeSeries([1..9]); v
2435
[1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000]
2436
sage: w = v.fft(); w
2437
[45.0000, -4.5000, 12.3636, -4.5000, 5.3629, -4.5000, 2.5981, -4.5000, 0.7935]
2438
2439
We get just the series of real parts of ::
2440
2441
sage: finance.TimeSeries([w[0]]) + w[1:].scale_time(2)
2442
[45.0000, -4.5000, -4.5000, -4.5000, -4.5000]
2443
2444
An example with an even number of terms::
2445
2446
sage: v = finance.TimeSeries([1..10]); v
2447
[1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, 10.0000]
2448
sage: w = v.fft(); w
2449
[55.0000, -5.0000, 15.3884, -5.0000, 6.8819, -5.0000, 3.6327, -5.0000, 1.6246, -5.0000]
2450
sage: v.fft().ifft()
2451
[1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, 10.0000]
2452
"""
2453
import scipy.fftpack
2454
if overwrite:
2455
y = self
2456
else:
2457
y = self.__copy__()
2458
w = y.numpy(copy=False)
2459
scipy.fftpack.rfft(w, overwrite_x=1)
2460
return y
2461
2462
def ifft(self, bint overwrite=False):
2463
r"""
2464
Return the real discrete inverse fast Fourier transform of
2465
``self``, which is also a real time series.
2466
2467
This is the inverse of ``fft()``.
2468
2469
The returned real array contains
2470
2471
.. MATH::
2472
2473
[y(0),y(1),\dots,y(n-1)]
2474
2475
where for `n` is even
2476
2477
.. MATH::
2478
2479
y(j)
2480
=
2481
1/n \left(
2482
\sum_{k=1}^{n/2-1}
2483
(x[2k-1]+\sqrt{-1} \cdot x[2k])
2484
\cdot \exp(\sqrt{-1} \cdot jk \cdot 2pi/n)
2485
+ c.c. + x[0] + (-1)^j x[n-1]
2486
\right)
2487
2488
and for `n` is odd
2489
2490
.. MATH::
2491
2492
y(j)
2493
=
2494
1/n \left(
2495
\sum_{k=1}^{(n-1)/2}
2496
(x[2k-1]+\sqrt{-1} \cdot x[2k])
2497
\cdot \exp(\sqrt{-1} \cdot jk \cdot 2pi/n)
2498
+ c.c. + x[0]
2499
\right)
2500
2501
where `c.c.` denotes complex conjugate of preceding expression.
2502
2503
EXAMPLES::
2504
2505
sage: v = finance.TimeSeries([1..10]); v
2506
[1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, 10.0000]
2507
sage: v.ifft()
2508
[5.1000, -5.6876, 1.4764, -1.0774, 0.4249, -0.1000, -0.2249, 0.6663, -1.2764, 1.6988]
2509
sage: v.ifft().fft()
2510
[1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, 10.0000]
2511
"""
2512
import scipy.fftpack
2513
if overwrite:
2514
y = self
2515
else:
2516
y = self.__copy__()
2517
w = y.numpy(copy=False)
2518
scipy.fftpack.irfft(w, overwrite_x=1)
2519
return y
2520
2521
2522
cdef new_time_series(Py_ssize_t length):
2523
"""
2524
Return a new uninitialized time series of the given length.
2525
The entries of the time series are garbage.
2526
2527
INPUT:
2528
2529
- ``length`` -- integer
2530
2531
OUTPUT:
2532
2533
A time series.
2534
2535
EXAMPLES:
2536
2537
This uses ``new_time_series`` implicitly::
2538
2539
sage: v = finance.TimeSeries([1,-3,4.5,-2])
2540
sage: v.__copy__()
2541
[1.0000, -3.0000, 4.5000, -2.0000]
2542
"""
2543
if length < 0:
2544
raise ValueError, "length must be nonnegative"
2545
cdef TimeSeries t = PY_NEW(TimeSeries)
2546
t._length = length
2547
t._values = <double*> sage_malloc(sizeof(double)*length)
2548
return t
2549
2550
def unpickle_time_series_v1(v, Py_ssize_t n):
2551
"""
2552
Version 1 unpickle method.
2553
2554
INPUT:
2555
2556
- ``v`` -- a raw char buffer
2557
2558
EXAMPLES::
2559
2560
sage: v = finance.TimeSeries([1,2,3])
2561
sage: s = v.__reduce__()[1][0]
2562
sage: type(s)
2563
<type 'str'>
2564
sage: sage.finance.time_series.unpickle_time_series_v1(s,3)
2565
[1.0000, 2.0000, 3.0000]
2566
sage: sage.finance.time_series.unpickle_time_series_v1(s+s,6)
2567
[1.0000, 2.0000, 3.0000, 1.0000, 2.0000, 3.0000]
2568
sage: sage.finance.time_series.unpickle_time_series_v1('',0)
2569
[]
2570
"""
2571
cdef TimeSeries t = new_time_series(n)
2572
memcpy(t._values, PyString_AsString(v), n*sizeof(double))
2573
return t
2574
2575
2576
2577
2578
def autoregressive_fit(acvs):
2579
r"""
2580
Given a sequence of lagged autocovariances of length `M` produce
2581
`a_1,\dots,a_p` so that the first `M` autocovariance coefficients
2582
of the autoregressive processes `X_t=a_1X_{t_1}+\cdots+a_pX_{t-p}+Z_t`
2583
are the same as the input sequence.
2584
2585
The function works by solving the Yule-Walker equations
2586
`\Gamma a =\gamma`, where `\gamma=(\gamma(1),\dots,\gamma(M))`,
2587
`a=(a_1,\dots,a_M)`, with `\gamma(i)` the autocovariance of lag `i`
2588
and `\Gamma_{ij}=\gamma(i-j)`.
2589
2590
EXAMPLES:
2591
2592
In this example we consider the multifractal cascade random walk
2593
of length 1000, and use simulations to estimate the
2594
expected first few autocovariance parameters for this model, then
2595
use them to construct a linear filter that works vastly better
2596
than a linear filter constructed from the same data but not using
2597
this model. The Monte-Carlo method illustrated below should work for
2598
predicting one "time step" into the future for any
2599
model that can be simulated. To predict k time steps into the
2600
future would require using a similar technique but would require
2601
scaling time by k.
2602
2603
We create 100 simulations of a multifractal random walk. This
2604
models the logarithms of a stock price sequence. ::
2605
2606
sage: set_random_seed(0)
2607
sage: y = finance.multifractal_cascade_random_walk_simulation(3700,0.02,0.01,0.01,1000,100)
2608
2609
For each walk below we replace the walk by the walk but where each
2610
step size is replaced by its absolute value -- this is what we
2611
expect to be able to predict given the model, which is only a
2612
model for predicting volatility. We compute the first 200
2613
autocovariance values for every random walk::
2614
2615
sage: c = [[a.diffs().abs().sums().autocovariance(i) for a in y] for i in range(200)]
2616
2617
We make a time series out of the expected values of the
2618
autocovariances::
2619
2620
sage: ac = finance.TimeSeries([finance.TimeSeries(z).mean() for z in c])
2621
sage: ac
2622
[3.9962, 3.9842, 3.9722, 3.9601, 3.9481 ... 1.7144, 1.7033, 1.6922, 1.6812, 1.6701]
2623
2624
.. NOTE::
2625
2626
``ac`` looks like a line -- one could best fit it to yield a lot
2627
more approximate autocovariances.
2628
2629
We compute the autoregression coefficients matching the above
2630
autocovariances::
2631
2632
sage: F = finance.autoregressive_fit(ac); F
2633
[0.9982, -0.0002, -0.0002, 0.0003, 0.0001 ... 0.0002, -0.0002, -0.0000, -0.0002, -0.0014]
2634
2635
Note that the sum is close to 1::
2636
2637
sage: sum(F)
2638
0.99593284089454...
2639
2640
Now we make up an 'out of sample' sequence::
2641
2642
sage: y2 = finance.multifractal_cascade_random_walk_simulation(3700,0.02,0.01,0.01,1000,1)[0].diffs().abs().sums()
2643
sage: y2
2644
[0.0013, 0.0059, 0.0066, 0.0068, 0.0184 ... 6.8004, 6.8009, 6.8063, 6.8090, 6.8339]
2645
2646
And we forecast the very last value using our linear filter; the forecast
2647
is close::
2648
2649
sage: y2[:-1].autoregressive_forecast(F)
2650
6.7836741372407...
2651
2652
In fact it is closer than we would get by forecasting using a
2653
linear filter made from all the autocovariances of our sequence::
2654
2655
sage: y2[:-1].autoregressive_forecast(y2[:-1].autoregressive_fit(len(y2)))
2656
6.770168705668...
2657
2658
We record the last 20 forecasts, always using all correct values up to the
2659
one we are forecasting::
2660
2661
sage: s1 = sum([(y2[:-i].autoregressive_forecast(F)-y2[-i])^2 for i in range(1,20)])
2662
2663
We do the same, but using the autocovariances of the sample sequence::
2664
2665
sage: F2 = y2[:-100].autoregressive_fit(len(F))
2666
sage: s2 = sum([(y2[:-i].autoregressive_forecast(F2)-y2[-i])^2 for i in range(1,20)])
2667
2668
Our model gives us something that is 15 percent better in this case::
2669
2670
sage: s2/s1
2671
1.15464636102...
2672
2673
How does it compare overall? To find out we do 100 simulations
2674
and for each we compute the percent that our model beats naively
2675
using the autocovariances of the sample::
2676
2677
sage: y_out = finance.multifractal_cascade_random_walk_simulation(3700,0.02,0.01,0.01,1000,100)
2678
sage: s1 = []; s2 = []
2679
sage: for v in y_out:
2680
... s1.append(sum([(v[:-i].autoregressive_forecast(F)-v[-i])^2 for i in range(1,20)]))
2681
... F2 = v[:-len(F)].autoregressive_fit(len(F))
2682
... s2.append(sum([(v[:-i].autoregressive_forecast(F2)-v[-i])^2 for i in range(1,20)]))
2683
...
2684
2685
We find that overall the model beats naive linear forecasting by 35
2686
percent! ::
2687
2688
sage: s = finance.TimeSeries([s2[i]/s1[i] for i in range(len(s1))])
2689
sage: s.mean()
2690
1.354073591877...
2691
"""
2692
cdef TimeSeries c
2693
cdef Py_ssize_t i
2694
2695
M = len(acvs)-1
2696
2697
if M <= 0:
2698
raise ValueError, "M must be positive"
2699
2700
if not isinstance(acvs, TimeSeries):
2701
c = TimeSeries(acvs)
2702
else:
2703
c = acvs
2704
2705
# Also create the numpy vector v with entries c(1), ..., c(M).
2706
v = c[1:].numpy()
2707
2708
# 2. Create the autocovariances numpy 2d array A whose i,j entry
2709
# is c(|i-j|).
2710
import numpy
2711
A = numpy.array([[c[abs(j-k)] for j in range(M)] for k in range(M)])
2712
2713
# 3. Solve the equation A * x = v
2714
x = numpy.linalg.solve(A, v)
2715
2716
# 4. The entries of x give the linear filter coefficients.
2717
return TimeSeries(x)
2718
2719