CoCalc -- time_series.pyx

GitHub Repository: sagemath/sagelib
Path: blob/master/sage/finance/time_series.pyx
⁴⁰⁷⁹ views
1
"""
2
Time Series
3

4
This is a module for working with discrete floating point time series.
5
It is designed so that every operation is very fast, typically much
6
faster than with other generic code, e.g., Python lists of doubles or
7
even NumPy arrays.  The semantics of time series is more similar to
8
Python lists of doubles than Sage real double vectors or NumPy 1-D
9
arrays.   In particular, time series are not endowed with much
10
algebraic structure and are always mutable. 
11

12
.. NOTE::
13

14
    NumPy arrays are faster at slicing, since slices return
15
    references, and NumPy arrays have strides.  However, this speed at
16
    slicing makes NumPy slower at certain other operations.
17

18
EXAMPLES::
19

20
    sage: set_random_seed(1)
21
    sage: t = finance.TimeSeries([random()-0.5 for _ in xrange(10)]); t
22
    [0.3294, 0.0959, -0.0706, -0.4646, 0.4311, 0.2275, -0.3840, -0.3528, -0.4119, -0.2933]
23
    sage: t.sums()
24
    [0.3294, 0.4253, 0.3547, -0.1099, 0.3212, 0.5487, 0.1647, -0.1882, -0.6001, -0.8933]
25
    sage: t.exponential_moving_average(0.7)
26
    [0.0000, 0.3294, 0.1660, 0.0003, -0.3251, 0.2042, 0.2205, -0.2027, -0.3078, -0.3807]
27
    sage: t.standard_deviation()
28
    0.33729638212891383
29
    sage: t.mean()
30
    -0.08933425506929439
31
    sage: t.variance()
32
    0.1137688493972542...
33

34
AUTHOR:
35

36
- William Stein
37
"""
38

39
include "../ext/cdefs.pxi"
40
include "../ext/stdsage.pxi"
41
include "../ext/python_string.pxi"
42
include "../ext/random.pxi"
43

44
cdef extern from "math.h":
45
    double exp(double)
46
    double floor(double)
47
    double log(double)
48
    double pow(double, double)
49
    double sqrt(double)
50

51
cdef extern from "string.h":
52
    void* memcpy(void* dst, void* src, size_t len)
53

54
cimport numpy as cnumpy
55

56
from sage.rings.integer import Integer
57
from sage.rings.real_double import RDF
58
from sage.modules.vector_real_double_dense cimport Vector_real_double_dense
59

60
max_print = 10
61
digits = 4
62
    
63
cdef class TimeSeries:
64
    def __cinit__(self):
65
        """
66
        Create new empty uninitialized time series.
67

68
        EXAMPLES:
69

70
        This implicitly calls new::
71

72
            sage: finance.TimeSeries([1,3,-4,5])
73
            [1.0000, 3.0000, -4.0000, 5.0000]
74
        """
75
        self._values = NULL
76
        
77
    def __init__(self, values, bint initialize=True):
78
        """
79
        Initialize new time series.
80
        
81
        INPUT:
82

83
        - ``values`` -- integer (number of values) or an iterable of
84
          floats.
85
            
86
        - ``initialize`` -- bool (default: ``True``); if ``False``, do not
87
          bother to zero out the entries of the new time series.
88
          For large series that you are going to just fill in,
89
          this can be way faster.
90

91
        EXAMPLES:
92

93
        This implicitly calls init::
94
        
95
            sage: finance.TimeSeries([pi, 3, 18.2])
96
            [3.1416, 3.0000, 18.2000]
97

98
        Conversion from a NumPy 1-D array, which is very fast::
99
        
100
            sage: v = finance.TimeSeries([1..5])
101
            sage: w = v.numpy()
102
            sage: finance.TimeSeries(w)
103
            [1.0000, 2.0000, 3.0000, 4.0000, 5.0000]
104

105
        Conversion from an n-dimensional NumPy array also works::
106
        
107
            sage: import numpy
108
            sage: v = numpy.array([[1,2], [3,4]], dtype=float); v
109
            array([[ 1.,  2.],
110
                   [ 3.,  4.]])
111
            sage: finance.TimeSeries(v)
112
            [1.0000, 2.0000, 3.0000, 4.0000]
113
            sage: finance.TimeSeries(v[:,0])
114
            [1.0000, 3.0000]
115
            sage: u = numpy.array([[1,2],[3,4]])
116
            sage: finance.TimeSeries(u)
117
            [1.0000, 2.0000, 3.0000, 4.0000]
118

119
        For speed purposes we don't initialize (so value is garbage)::
120

121
            sage: t = finance.TimeSeries(10, initialize=False)        
122
        """
123
        cdef Vector_real_double_dense z
124
        cdef cnumpy.ndarray np
125
        cdef double *np_data
126
        cdef unsigned int j
127
        if isinstance(values, (int, long, Integer)):
128
            self._length = values
129
            values = None
130
        elif PY_TYPE_CHECK(values, Vector_real_double_dense) or PY_TYPE_CHECK(values, cnumpy.ndarray):
131
            if PY_TYPE_CHECK(values, Vector_real_double_dense):
132
                np  = values._vector_numpy
133
            else:
134
                np = values
135

136
            if np.ndim != 1:
137
                np = np.reshape([np.size])
138

139
            # Make the array be the correct type and have a C array
140
            # for a data structure.  If the array already is the
141
            # correct type and has a C array, nothing is done, so this
142
            # should be fast in the common case.
143
            np = np.astype('double')
144
            np = cnumpy.PyArray_GETCONTIGUOUS(np)
145
            np_data = <double*> cnumpy.PyArray_DATA(np)
146
            self._length = np.shape[0]
147
            self._values = <double*> sage_malloc(sizeof(double) * self._length)
148
            if self._values == NULL:
149
                raise MemoryError
150

151
            memcpy(self._values, np_data, sizeof(double)*self._length)
152
            return
153
        else:
154
            values = [float(x) for x in values]
155
            self._length = len(values)
156
            
157
        self._values = <double*> sage_malloc(sizeof(double) * self._length)
158
        if self._values == NULL:
159
            raise MemoryError
160
        if not initialize: return
161
        cdef Py_ssize_t i
162
        if values is not None:
163
            for i from 0 <= i < self._length:
164
                self._values[i] = values[i]
165
        else:
166
            for i from 0 <= i < self._length:
167
                self._values[i] = 0
168

169
    def __reduce__(self):
170
        """
171
        Used in pickling time series.
172

173
        EXAMPLES::
174

175
            sage: v = finance.TimeSeries([1,-3.5])
176
            sage: v.__reduce__()
177
            (<built-in function unpickle_time_series_v1>, (..., 2))
178
            sage: loads(dumps(v)) == v
179
            True
180

181
        Note that dumping and loading with compress ``False`` is much faster,
182
        though dumping with compress ``True`` can save a lot of space::
183

184
            sage: v = finance.TimeSeries([1..10^5])
185
            sage: loads(dumps(v, compress=False),compress=False) == v
186
            True
187
        """
188
        buf = PyString_FromStringAndSize(<char*>self._values, self._length*sizeof(double)/sizeof(char))
189
        return unpickle_time_series_v1, (buf, self._length)
190

191
    def __cmp__(self, _other):
192
        """
193
        Compare ``self`` and ``other``.  This has the same semantics
194
        as list comparison.
195

196
        EXAMPLES::
197

198
            sage: v = finance.TimeSeries([1,2,3]); w = finance.TimeSeries([1,2])
199
            sage: v < w
200
            False
201
            sage: w < v
202
            True
203
            sage: v == v
204
            True
205
            sage: w == w
206
            True
207
        """
208
        cdef TimeSeries other
209
        cdef Py_ssize_t c, i
210
        cdef double d
211
        if not isinstance(_other, TimeSeries):
212
            _other = TimeSeries(_other)
213
        other = _other
214
        for i from 0 <= i < min(self._length, other._length):
215
            d = self._values[i] - other._values[i]
216
            if d:
217
                return -1 if d < 0 else 1
218
        c = self._length - other._length
219
        if c < 0:
220
            return -1
221
        elif c > 0:
222
            return 1
223
        return 0
224

225
    def  __dealloc__(self):
226
        """
227
        Free up memory used by a time series.
228

229
        EXAMPLES:
230

231
        This tests ``__dealloc__`` implicitly::
232

233
            sage: v = finance.TimeSeries([1,3,-4,5])
234
            sage: del v
235
        """
236
        if self._values:
237
            sage_free(self._values)
238

239
    def vector(self):
240
        """
241
        Return real double vector whose entries are the values of this
242
        time series.  This is useful since vectors have standard
243
        algebraic structure and play well with matrices.
244

245
        OUTPUT:
246

247
        A real double vector.
248

249
        EXAMPLES::
250

251
            sage: v = finance.TimeSeries([1..10])
252
            sage: v.vector()
253
            (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)
254
        """
255
        V = RDF**self._length
256
        # A copy of the numpy array is made in the vector constructor
257
        cdef Vector_real_double_dense x = Vector_real_double_dense(V, self.numpy(copy=False))
258
        return x
259

260
    def __repr__(self):
261
        """
262
        Return string representation of ``self``.
263

264
        EXAMPLES::
265

266
            sage: v = finance.TimeSeries([1,3.1908439,-4,5.93932])
267
            sage: v.__repr__()
268
            '[1.0000, 3.1908, -4.0000, 5.9393]'
269
        
270
        By default 4 digits after the decimal point are displayed.  To
271
        change this, change ``self.finance.time_series.digits``. ::
272

273
            sage: sage.finance.time_series.digits = 2
274
            sage: v.__repr__()
275
            '[1.00, 3.19, -4.00, 5.94]'
276
            sage: v
277
            [1.00, 3.19, -4.00, 5.94]
278
            sage: sage.finance.time_series.digits = 4
279
            sage: v
280
            [1.0000, 3.1908, -4.0000, 5.9393]
281
        """
282
        return self._repr()
283
    
284
    def _repr(self, prec=None):
285
        """
286
        Print representation of a time series.
287

288
        INPUT:
289

290
        - ``prec`` -- (default: ``None``) number of digits of precision or
291
          ``None``. If ``None`` use the default
292
          ``sage.finance.time_series.digits``.
293

294
        OUTPUT:
295

296
        A string.
297
                     
298
        EXAMPLES::
299

300
            sage: v = finance.TimeSeries([1,3.1908439,-4,5.93932])
301
            sage: v._repr()
302
            '[1.0000, 3.1908, -4.0000, 5.9393]'
303
            sage: v._repr(10)
304
            '[1.0000000000, 3.1908439000, -4.0000000000, 5.9393200000]'
305
            sage: v._repr(2)
306
            '[1.00, 3.19, -4.00, 5.94]'
307
        """
308
        if prec is None: prec = digits
309
        format = '%.' + str(prec) + 'f'
310
        if len(self) > max_print:
311
            v0 = self[:max_print//2]
312
            v1 = self[-max_print//2:]
313
            return '[' + ', '.join([format%x for x in v0]) + ' ... ' + \
314
                         ', '.join([format%x for x in v1]) + ']'
315
        else:
316
            return '[' + ', '.join([format%x for x in self]) + ']'
317

318
    def __len__(self):
319
        """
320
        Return the number of entries in this time series.
321

322
        OUTPUT:
323

324
        Python integer.
325

326
        EXAMPLES::
327

328
            sage: v = finance.TimeSeries([1,3.1908439,-4,5.93932])
329
            sage: v.__len__()
330
            4
331
            sage: len(v)
332
            4
333
        """
334
        return self._length
335

336
    def __getitem__(self, i):
337
        """
338
        Return i-th entry or slice of ``self``.
339
        
340
        EXAMPLES::
341

342
            sage: v = finance.TimeSeries([1,-4,3,-2.5,-4,3])
343
            sage: v[2]
344
            3.0
345
            sage: v[-1]
346
            3.0
347
            sage: v[-10]
348
            Traceback (most recent call last):
349
            ...
350
            IndexError: TimeSeries index out of range
351
            sage: v[5]
352
            3.0
353
            sage: v[6]
354
            Traceback (most recent call last):
355
            ...
356
            IndexError: TimeSeries index out of range
357

358
        Some slice examples::
359

360
            sage: v[-3:]
361
            [-2.5000, -4.0000, 3.0000]
362
            sage: v[-3:-1]
363
            [-2.5000, -4.0000]
364
            sage: v[::2]
365
            [1.0000, 3.0000, -4.0000]
366
            sage: v[3:20]
367
            [-2.5000, -4.0000, 3.0000]
368
            sage: v[3:2]
369
            []
370

371
        Make a copy::
372

373
            sage: v[:]
374
            [1.0000, -4.0000, 3.0000, -2.5000, -4.0000, 3.0000]
375

376
        Reverse the time series::
377

378
            sage: v[::-1]
379
            [3.0000, -4.0000, -2.5000, 3.0000, -4.0000, 1.0000]
380
        """
381
        cdef Py_ssize_t start, stop, step, j
382
        cdef TimeSeries t
383
        if PySlice_Check(i):
384
            start = 0 if (i.start is None) else i.start
385
            stop = self._length if (i.stop is None) else i.stop
386
            step = 1 if (i.step is None) else i.step
387
            if start < 0:
388
                start += self._length
389
                if start < 0: start = 0
390
            elif start >= self._length:
391
                start = self._length - 1
392
            if stop < 0:
393
                stop += self._length
394
                if stop < 0: stop = 0
395
            elif stop > self._length:
396
                stop = self._length 
397
            if start >= stop:
398
                return new_time_series(0)
399
            if step < 0:
400
                step = -step
401
                t = new_time_series((stop-start)/step)
402
                for j from 0 <= j < (stop-start)/step:
403
                    t._values[j] = self._values[stop-1 - j*step]
404
            elif step > 1:
405
                t = new_time_series((stop-start)/step)
406
                for j from 0 <= j < (stop-start)/step:
407
                    t._values[j] = self._values[j*step+start]
408
            else:
409
                t = new_time_series(stop-start)
410
                memcpy(t._values, self._values + start, sizeof(double)*t._length)
411
            return t
412
        else:
413
            j = i
414
            if j < 0:
415
                j += self._length
416
                if j < 0:
417
                    raise IndexError, "TimeSeries index out of range"
418
            elif j >= self._length:
419
                raise IndexError, "TimeSeries index out of range"
420
            return self._values[j]
421

422
    def __setitem__(self, Py_ssize_t i, double x):
423
        """
424
        Set the i-th entry of ``self`` to ``x``.
425

426
        INPUT:
427

428
        - i -- a nonnegative integer.
429

430
        - x -- a float.
431

432
        EXAMPLES::
433

434
            sage: v = finance.TimeSeries([1,3,-4,5.93932]); v
435
            [1.0000, 3.0000, -4.0000, 5.9393]
436
            sage: v[0] = -5.5; v
437
            [-5.5000, 3.0000, -4.0000, 5.9393]
438
            sage: v[-1] = 3.2; v
439
            [-5.5000, 3.0000, -4.0000, 3.2000]
440
            sage: v[10]
441
            Traceback (most recent call last):
442
            ...
443
            IndexError: TimeSeries index out of range
444
            sage: v[-5]
445
            Traceback (most recent call last):
446
            ...
447
            IndexError: TimeSeries index out of range
448
        """
449
        if i < 0:
450
            i += self._length
451
            if i < 0:
452
                raise IndexError, "TimeSeries index out of range"
453
        elif i >= self._length:
454
            raise IndexError, "TimeSeries index out of range"
455
        self._values[i] = x
456

457
    def __copy__(self):
458
        """
459
        Return a copy of ``self``.
460

461
        EXAMPLES::
462

463
            sage: v = finance.TimeSeries([1,-4,3,-2.5,-4,3])
464
            sage: v.__copy__()
465
            [1.0000, -4.0000, 3.0000, -2.5000, -4.0000, 3.0000]
466
            sage: copy(v)
467
            [1.0000, -4.0000, 3.0000, -2.5000, -4.0000, 3.0000]
468
            sage: copy(v) is v
469
            False
470
        """
471
        cdef Py_ssize_t i
472
        cdef TimeSeries t = new_time_series(self._length)
473
        memcpy(t._values, self._values , sizeof(double)*self._length)
474
        return t
475

476
    def __add__(left, right):
477
        """
478
        Concatenate the time series ``self`` and ``right``.
479

480
        .. NOTE::
481

482
            To add a single number to the entries of a time series,
483
            use the ``add_scalar`` method, and to add componentwise use
484
            the ``add_entries`` method.
485
        
486
        INPUT:
487

488
        - ``right`` -- a time series.
489

490
        OUTPUT:
491

492
        A time series.
493

494
        EXAMPLES::
495

496
            sage: v = finance.TimeSeries([1,2,3]); w = finance.TimeSeries([1,2])
497
            sage: v + w
498
            [1.0000, 2.0000, 3.0000, 1.0000, 2.0000]
499
            sage: v = finance.TimeSeries([1,2,-5]); v
500
            [1.0000, 2.0000, -5.0000]
501

502
        Note that both summands must be a time series::
503

504
            sage: v + xrange(4)
505
            Traceback (most recent call last):
506
            ...
507
            TypeError: right operand must be a time series
508
            sage: [1,5] + v
509
            Traceback (most recent call last):
510
            ...
511
            TypeError: left operand must be a time series
512
        """
513
        if not isinstance(right, TimeSeries):
514
            raise TypeError, "right operand must be a time series"
515
        if not isinstance(left, TimeSeries):
516
            raise TypeError, "left operand must be a time series"
517
        cdef TimeSeries R = right
518
        cdef TimeSeries L = left
519
        cdef TimeSeries t = new_time_series(L._length + R._length)
520
        memcpy(t._values, L._values, sizeof(double)*L._length)
521
        memcpy(t._values + L._length, R._values, sizeof(double)*R._length)
522
        return t
523

524
    def __mul__(left, right):
525
        """
526
        Multiply a time series by an integer n, which (like for lists)
527
        results in the time series concatenated with itself n times.
528

529
        .. NOTE::
530

531
            To multiply all the entries of a time series by a single
532
            scalar, use the ``scale`` method.
533
        
534
        INPUT:
535

536
        - ``left``, ``right`` -- an integer and a time series.
537

538
        OUTPUT:
539

540
        A time series.
541

542
        EXAMPLES::
543

544
            sage: v = finance.TimeSeries([1,2,-5]); v
545
            [1.0000, 2.0000, -5.0000]
546
            sage: v*3
547
            [1.0000, 2.0000, -5.0000, 1.0000, 2.0000, -5.0000, 1.0000, 2.0000, -5.0000]
548
            sage: 3*v
549
            [1.0000, 2.0000, -5.0000, 1.0000, 2.0000, -5.0000, 1.0000, 2.0000, -5.0000]
550
            sage: v*v
551
            Traceback (most recent call last):
552
            ...
553
            TypeError: 'sage.finance.time_series.TimeSeries' object cannot be interpreted as an index
554
        """
555
        cdef Py_ssize_t n, i
556
        cdef TimeSeries T
557
        if isinstance(left, TimeSeries):
558
            T = left
559
            n = right
560
        else:
561
            T = right
562
            n = left
563
        # Make n copies of T concatenated together
564
        cdef TimeSeries v = new_time_series(T._length * n)
565
        for i from 0 <= i < n:
566
            memcpy(v._values + i*T._length, T._values, sizeof(double)*T._length)
567
        return v
568

569

570
    def autoregressive_fit(self,M):
571
        r"""
572
        This method fits the time series to an autoregressive process
573
        of order ``M``. That is, we assume the process is given by
574
        `X_t-\mu=a_1(X_{t-1}-\mu)+a_2(X_{t-1}-\mu)+\cdots+a_M(X_{t-M}-\mu)+Z_t`
575
        where `\mu` is the mean of the process and `Z_t` is noise.
576
        This method returns estimates for `a_1,\dots,a_M`.
577

578
        The method works by solving the Yule-Walker equations
579
        `\Gamma a =\gamma`, where `\gamma=(\gamma(1),\dots,\gamma(M))`,
580
        `a=(a_1,\dots,a_M)`  with `\gamma(i)` the autocovariance of lag `i`
581
        and `\Gamma_{ij}=\gamma(i-j)`.
582
        
583
        
584
        .. WARNING::
585

586
            The input sequence is assumed to be stationary, which
587
            means that the autocovariance `\langle y_j y_k \rangle` depends
588
            only on the difference `|j-k|`.
589

590
        INPUT:
591

592
        - ``M`` -- an integer.
593

594
        OUTPUT:
595

596
        A time series -- the coefficients of the autoregressive process.
597

598
        EXAMPLES::
599

600
            sage: set_random_seed(0)
601
            sage: v = finance.TimeSeries(10^4).randomize('normal').sums()
602
            sage: F = v.autoregressive_fit(100)
603
            sage: v
604
            [0.6767, 0.2756, 0.6332, 0.0469, -0.8897 ... 87.6759, 87.6825, 87.4120, 87.6639, 86.3194]
605
            sage: v.autoregressive_forecast(F)
606
            86.0177285042...
607
            sage: F
608
            [1.0148, -0.0029, -0.0105, 0.0067, -0.0232 ... -0.0106, -0.0068, 0.0085, -0.0131, 0.0092]
609

610
            sage: set_random_seed(0)
611
            sage: t=finance.TimeSeries(2000)
612
            sage: z=finance.TimeSeries(2000)
613
            sage: z.randomize('normal',1)
614
            [1.6767, 0.5989, 1.3576, 0.4136, 0.0635 ... 1.0057, -1.1467, 1.2809, 1.5705, 1.1095]
615
            sage: t[0]=1
616
            sage: t[1]=2
617
            sage: for i in range(2,2000):
618
            ...     t[i]=t[i-1]-0.5*t[i-2]+z[i]
619
            ...            
620
            sage: c=t[0:-1].autoregressive_fit(2)  #recovers recurrence relation
621
            sage: c #should be close to [1,-0.5]
622
            [1.0371, -0.5199]
623
        """
624
        acvs = [self.autocovariance(i) for i in range(M+1)]
625
        return autoregressive_fit(acvs)
626

627
    def autoregressive_forecast(self, filter):
628
        """
629
        Given the autoregression coefficients as outputted by the
630
        ``autoregressive_fit`` command, compute the forecast for the next
631
        term in the series.
632

633
        INPUT:
634

635
        - ``filter`` -- a time series outputted by the ``autoregressive_fit``
636
          command.
637

638
        EXAMPLES::
639

640
            sage: set_random_seed(0)
641
            sage: v = finance.TimeSeries(100).randomize('normal').sums()
642
            sage: F = v[:-1].autoregressive_fit(5); F
643
            [1.0019, -0.0524, -0.0643, 0.1323, -0.0539]
644
            sage: v.autoregressive_forecast(F)
645
            11.7820298611...
646
            sage: v
647
            [0.6767, 0.2756, 0.6332, 0.0469, -0.8897 ... 9.2447, 9.6709, 10.4037, 10.4836, 12.1960]
648
        """
649
        cdef TimeSeries filt
650
        if isinstance(filter, TimeSeries):
651
            filt = filter
652
        else:
653
            filt = TimeSeries(filter)
654
        
655
        cdef double f = 0
656
        cdef Py_ssize_t i
657
        for i from 0 <= i < min(self._length, filt._length):
658
            f += self._values[self._length - i - 1] * filt._values[i]
659
        return f
660

661
    def reversed(self):
662
        """
663
        Return new time series obtain from this time series by
664
        reversing the order of the entries in this time series.
665

666
        OUTPUT:
667

668
        A time series.
669

670
        EXAMPLES::
671

672
            sage: v = finance.TimeSeries([1..5])
673
            sage: v.reversed()
674
            [5.0000, 4.0000, 3.0000, 2.0000, 1.0000]
675
        """
676
        cdef Py_ssize_t i, n = self._length-1
677
        cdef TimeSeries t = new_time_series(self._length)
678

679
        for i from 0 <= i < self._length:
680
            t._values[i] = self._values[n - i]
681
        return t        
682

683
    def extend(self, right):
684
        """
685
        Extend this time series by appending elements from the iterable
686
        ``right``.
687

688
        INPUT:
689

690
        - ``right`` -- iterable that can be converted to a time series.
691

692
        EXAMPLES::
693

694
            sage: v = finance.TimeSeries([1,2,-5]); v
695
            [1.0000, 2.0000, -5.0000]
696
            sage: v.extend([-3.5, 2])
697
            sage: v
698
            [1.0000, 2.0000, -5.0000, -3.5000, 2.0000]
699
        """
700
        if not isinstance(right, TimeSeries):
701
            right = TimeSeries(right)
702
        if len(right) == 0:
703
            return
704
        cdef TimeSeries T = right
705
        cdef double* z = <double*> sage_malloc(sizeof(double)*(self._length + T._length))
706
        if z == NULL:
707
            raise MemoryError
708
        memcpy(z, self._values, sizeof(double)*self._length)
709
        memcpy(z + self._length, T._values, sizeof(double)*T._length)
710
        sage_free(self._values)
711
        self._values = z
712
        self._length = self._length + T._length
713

714
    def list(self):
715
        """
716
        Return list of elements of ``self``.
717

718
        EXAMPLES::
719

720
            sage: v = finance.TimeSeries([1,-4,3,-2.5,-4,3])
721
            sage: v.list()
722
            [1.0, -4.0, 3.0, -2.5, -4.0, 3.0]
723
        """
724
        cdef Py_ssize_t i
725
        return [self._values[i] for i in range(self._length)]
726

727
    def log(self):
728
        """
729
        Return new time series got by taking the logarithms of all the
730
        terms in the time series.
731

732
        OUTPUT:
733

734
        A new time series. 
735

736
        EXAMPLES:
737

738
        We exponentiate then log a time series and get back
739
        the original series::
740

741
            sage: v = finance.TimeSeries([1,-4,3,-2.5,-4,3]); v
742
            [1.0000, -4.0000, 3.0000, -2.5000, -4.0000, 3.0000]
743
            sage: v.exp()
744
            [2.7183, 0.0183, 20.0855, 0.0821, 0.0183, 20.0855]
745
            sage: v.exp().log()
746
            [1.0000, -4.0000, 3.0000, -2.5000, -4.0000, 3.0000]
747

748
        Log of 0 gives ``-inf``::
749

750
            sage: finance.TimeSeries([1,0,3]).log()[1]
751
            -inf
752
        """
753
        cdef Py_ssize_t i
754
        cdef TimeSeries t = new_time_series(self._length)
755
        for i from 0 <= i < self._length:
756
            t._values[i] = log(self._values[i])
757
        return t
758

759
    def exp(self):
760
        """
761
        Return new time series got by applying the exponential map to
762
        all the terms in the time series.
763

764
        OUTPUT:
765

766
        A new time series. 
767

768
        EXAMPLES::
769

770
            sage: v = finance.TimeSeries([1..5]); v
771
            [1.0000, 2.0000, 3.0000, 4.0000, 5.0000]
772
            sage: v.exp()
773
            [2.7183, 7.3891, 20.0855, 54.5982, 148.4132]
774
            sage: v.exp().log()
775
            [1.0000, 2.0000, 3.0000, 4.0000, 5.0000]
776
        """
777
        cdef Py_ssize_t i
778
        cdef TimeSeries t = new_time_series(self._length)
779
        for i from 0 <= i < self._length:
780
            t._values[i] = exp(self._values[i])
781
        return t
782

783
    def abs(self):
784
        """
785
        Return new time series got by replacing all entries
786
        of ``self`` by their absolute value.
787

788
        OUTPUT:
789

790
        A new time series.
791

792
        EXAMPLES::
793

794
            sage: v = finance.TimeSeries([1,3.1908439,-4,5.93932])
795
            sage: v
796
            [1.0000, 3.1908, -4.0000, 5.9393]
797
            sage: v.abs()
798
            [1.0000, 3.1908, 4.0000, 5.9393]
799
        """
800
        cdef Py_ssize_t i
801
        cdef TimeSeries t = new_time_series(self._length)
802
        for i from 0 <= i < self._length:
803
            t._values[i] = self._values[i] if self._values[i] >= 0 else -self._values[i]
804
        return t
805

806
    def diffs(self, Py_ssize_t k = 1):
807
        r"""
808
        Return the new time series got by taking the differences of
809
        successive terms in the time series.  So if ``self`` is the time
810
        series `X_0, X_1, X_2, \dots`, then this function outputs the
811
        series `X_1 - X_0, X_2 - X_1, \dots`.  The output series has one
812
        less term than the input series.  If the optional parameter
813
        `k` is given, return `X_k - X_0, X_{2k} - X_k, \dots`. 
814

815
        INPUT:
816

817
        - ``k`` -- positive integer (default: 1)
818

819
        OUTPUT:
820

821
        A new time series.
822

823
        EXAMPLES::
824

825
            sage: v = finance.TimeSeries([5,4,1.3,2,8]); v
826
            [5.0000, 4.0000, 1.3000, 2.0000, 8.0000]
827
            sage: v.diffs()
828
            [-1.0000, -2.7000, 0.7000, 6.0000]
829
        """
830
        if k != 1:
831
            return self.scale_time(k).diffs()
832
        cdef Py_ssize_t i
833
        cdef TimeSeries t = new_time_series(self._length - 1)
834
        for i from 1 <= i < self._length:
835
            t._values[i-1] = self._values[i] - self._values[i-1]
836
        return t
837

838
    def scale_time(self, Py_ssize_t k):
839
        r"""
840
        Return the new time series at scale ``k``.  If the input
841
        time series is `X_0, X_1, X_2, \dots`, then this function
842
        returns the shorter time series `X_0, X_k, X_{2k}, \dots`.
843

844
        INPUT:
845

846
        - ``k`` -- a positive integer.
847

848
        OUTPUT:
849

850
        A new time series.
851

852
        EXAMPLES::
853

854
            sage: v = finance.TimeSeries([5,4,1.3,2,8,10,3,-5]); v
855
            [5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
856
            sage: v.scale_time(1)
857
            [5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
858
            sage: v.scale_time(2)
859
            [5.0000, 1.3000, 8.0000, 3.0000]
860
            sage: v.scale_time(3)
861
            [5.0000, 2.0000]
862
            sage: v.scale_time(10)
863
            []
864

865
        A series of odd length::
866

867
            sage: v = finance.TimeSeries([1..5]); v
868
            [1.0000, 2.0000, 3.0000, 4.0000, 5.0000]
869
            sage: v.scale_time(2)
870
            [1.0000, 3.0000, 5.0000]
871

872
        TESTS::
873

874
            sage: v.scale_time(0)
875
            Traceback (most recent call last):
876
            ...
877
            ValueError: k must be positive
878
            sage: v.scale_time(-1)
879
            Traceback (most recent call last):
880
            ...
881
            ValueError: k must be positive
882
        """
883
        if k <= 0:
884
            raise ValueError, "k must be positive"
885

886
        cdef Py_ssize_t i, n
887
        n = self._length/k
888
        if self._length % 2:
889
            n += 1
890
        cdef TimeSeries t = new_time_series(n)  # in C / is floor division.
891
        for i from 0 <= i < n:
892
            t._values[i] = self._values[i*k]
893
        return t
894

895
    cpdef rescale(self, double s):
896
        """
897
        Change ``self`` by multiplying every value in the series by ``s``.
898

899
        INPUT:
900

901
        - ``s`` -- a float.
902

903
        EXAMPLES::
904

905
            sage: v = finance.TimeSeries([5,4,1.3,2,8,10,3,-5]); v
906
            [5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
907
            sage: v.rescale(0.5)
908
            sage: v
909
            [2.5000, 2.0000, 0.6500, 1.0000, 4.0000, 5.0000, 1.5000, -2.5000]
910
        """
911
        for i from 0 <= i < self._length:
912
            self._values[i] = self._values[i] * s
913

914
    def scale(self, double s):
915
        """
916
        Return new time series obtained by multiplying every value in the
917
        series by ``s``.
918

919
        INPUT:
920

921
        - ``s`` -- a float.
922

923
        OUTPUT:
924

925
        A new time series with all values multiplied by ``s``.
926

927
        EXAMPLES::
928

929
            sage: v = finance.TimeSeries([5,4,1.3,2,8,10,3,-5]); v
930
            [5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
931
            sage: v.scale(0.5)
932
            [2.5000, 2.0000, 0.6500, 1.0000, 4.0000, 5.0000, 1.5000, -2.5000]
933
        """
934
        cdef TimeSeries t = new_time_series(self._length)
935
        for i from 0 <= i < self._length:
936
            t._values[i] = self._values[i] * s
937
        return t
938

939
    def add_scalar(self, double s):
940
        """
941
        Return new time series obtained by adding a scalar to every
942
        value in the series.
943

944
        .. NOTE::
945

946
            To add componentwise, use the ``add_entries`` method.  
947

948
        INPUT:
949

950
        - ``s`` -- a float.
951

952
        OUTPUT:
953

954
        A new time series with ``s`` added to all values.
955

956
        EXAMPLES::
957

958
            sage: v = finance.TimeSeries([5,4,1.3,2,8,10,3,-5]); v
959
            [5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
960
            sage: v.add_scalar(0.5)
961
            [5.5000, 4.5000, 1.8000, 2.5000, 8.5000, 10.5000, 3.5000, -4.5000]
962
        """
963
        cdef TimeSeries t = new_time_series(self._length)
964
        for i from 0 <= i < self._length:
965
            t._values[i] = self._values[i] + s
966
        return t
967

968
    def add_entries(self, t):
969
        """
970
        Add corresponding entries of ``self`` and ``t`` together,
971
        extending either ``self`` or ``t`` by 0's if they do
972
        not have the same length.
973

974
        .. NOTE::
975

976
            To add a single number to the entries of a time series,
977
            use the ``add_scalar`` method.
978

979
        INPUT:
980

981
        - ``t`` -- a time series.
982

983
        OUTPUT:
984

985
        A time series with length the maxima of the lengths of
986
        ``self`` and ``t``.
987
            
988
        EXAMPLES::
989

990
            sage: v = finance.TimeSeries([1,2,-5]); v
991
            [1.0000, 2.0000, -5.0000]
992
            sage: v.add_entries([3,4])
993
            [4.0000, 6.0000, -5.0000]
994
            sage: v.add_entries(v)
995
            [2.0000, 4.0000, -10.0000]
996
            sage: v.add_entries([3,4,7,18.5])
997
            [4.0000, 6.0000, 2.0000, 18.5000]
998
        """
999
        if not PY_TYPE_CHECK(t, TimeSeries):
1000
            t = TimeSeries(t)
1001
        cdef Py_ssize_t i, n
1002
        cdef TimeSeries T = t, shorter, longer
1003
        if self._length > T._length:
1004
            shorter = T
1005
            longer = self
1006
        else:
1007
            shorter = self
1008
            longer = T
1009
        
1010
        n = longer._length
1011
        cdef TimeSeries v = new_time_series(n)
1012

1013
        # Iterate up to the length of the shorter one
1014
        for i from 0 <= i < shorter._length:
1015
            v._values[i] = shorter._values[i] + longer._values[i]
1016

1017
        # Copy over the rest of the values
1018
        if shorter._length != n:
1019
            memcpy(v._values + shorter._length,
1020
                   longer._values + shorter._length,
1021
                   sizeof(double)*(v._length - shorter._length))
1022

1023
        return v
1024

1025
    def show(self, *args, **kwds):
1026
        """
1027
        Calls plot and passes all arguments onto the plot function.  This is
1028
        thus just an alias for plot.
1029

1030
        EXAMPLES:
1031

1032
        Draw a plot of a time series::
1033

1034
            sage: finance.TimeSeries([1..10]).show()
1035
        """
1036
        return self.plot(*args, **kwds)
1037

1038
    def plot(self, Py_ssize_t plot_points=1000, points=False, **kwds):
1039
        r"""
1040
        Return a plot of this time series as a line or points through
1041
        `(i,T(i))`, where `i` ranges over nonnegative integers up to the
1042
        length of ``self``.
1043

1044
        INPUT:
1045

1046
        - ``plot_points`` -- (default: 1000) 0 or positive integer. Only
1047
          plot the given number of equally spaced points in the time series.
1048
          If 0, plot all points.
1049

1050
        - ``points`` -- bool (default: ``False``). If ``True``, return just
1051
          the points of the time series.
1052

1053
        - ``**kwds`` -- passed to the line or point command.
1054
            
1055
        EXAMPLES::
1056

1057
            sage: v = finance.TimeSeries([5,4,1.3,2,8,10,3,-5]); v
1058
            [5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
1059
            sage: v.plot()
1060
            sage: v.plot(points=True)
1061
            sage: v.plot() + v.plot(points=True, rgbcolor='red')
1062
            sage: v.plot() + v.plot(points=True, rgbcolor='red', pointsize=50)
1063
        """
1064
        from sage.plot.all import line, point
1065
        cdef Py_ssize_t s
1066

1067
        if self._length < plot_points:
1068
            plot_points = 0
1069
        
1070
        if plot_points > 0:
1071
            s = self._length/plot_points
1072
            if plot_points <= 0:
1073
                raise ValueError, "plot_points must be a positive integer"
1074
            v = self.scale_time(s).list()[:plot_points]
1075
        else:
1076
            s = 1
1077
            v = self.list()
1078
        w = [(i * s, y) for i,y in enumerate(v)]
1079
        if points:
1080
            L = point(w, **kwds)            
1081
        else:
1082
            L = line(w, **kwds)
1083
        L.axes_range(ymin=min(v), ymax=max(v), xmin=0, xmax=len(v)*s)
1084
        return L
1085

1086
    def simple_moving_average(self, Py_ssize_t k):
1087
        """
1088
        Return the moving average time series over the last ``k`` time units.
1089
        Assumes the input time series was constant with its starting value
1090
        for negative time.  The t-th step of the output is the sum of
1091
        the previous ``k - 1`` steps of ``self`` and the ``k``-th step
1092
        divided by ``k``. Thus ``k`` values are averaged at each point.
1093

1094
        INPUT:
1095

1096
        - ``k`` -- positive integer.
1097

1098
        OUTPUT:
1099

1100
        A time series with the same number of steps as ``self``.
1101

1102
        EXAMPLES::
1103

1104
            sage: v = finance.TimeSeries([1,1,1,2,3]); v
1105
            [1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1106
            sage: v.simple_moving_average(0)
1107
            [1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1108
            sage: v.simple_moving_average(1)
1109
            [1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1110
            sage: v.simple_moving_average(2)
1111
            [1.0000, 1.0000, 1.0000, 1.5000, 2.5000]
1112
            sage: v.simple_moving_average(3)
1113
            [1.0000, 1.0000, 1.0000, 1.3333, 2.0000]
1114
        """
1115
        if k == 0 or k == 1:
1116
            return self.__copy__()
1117
        if k <= 0:
1118
            raise ValueError, "k must be positive"
1119
        cdef Py_ssize_t i
1120
        cdef TimeSeries t = new_time_series(self._length)
1121
        if self._length == 0:
1122
            return t
1123

1124
        cdef double s = self._values[0] * k  
1125
        for i from 0 <= i < self._length:
1126
            if i >= k:
1127
                s -= self._values[i-k]
1128
            else:
1129
                s -= self._values[0]
1130
            # I have a slight concern about accumulated rounding error given how
1131
            # this algorithm adds and subtracts.
1132
            s += self._values[i]
1133
            t._values[i] = s/k
1134
        return t
1135

1136
    def exponential_moving_average(self, double alpha):
1137
        """
1138
        Return the exponential moving average time series.  Assumes
1139
        the input time series was constant with its starting value for
1140
        negative time.  The t-th step of the output is the sum of the
1141
        previous k-1 steps of ``self`` and the k-th step divided by k.
1142

1143
        The 0-th term is formally undefined, so we define it to be 0,
1144
        and we define the first term to be ``self[0]``.
1145

1146
        INPUT:
1147

1148
        - ``alpha`` -- float; a smoothing factor with ``0 <= alpha <= 1``.
1149

1150
        OUTPUT:
1151

1152
        A time series with the same number of steps as ``self``.
1153

1154
        EXAMPLES::
1155

1156
            sage: v = finance.TimeSeries([1,1,1,2,3]); v
1157
            [1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1158
            sage: v.exponential_moving_average(0)
1159
            [0.0000, 1.0000, 1.0000, 1.0000, 1.0000]
1160
            sage: v.exponential_moving_average(1)
1161
            [0.0000, 1.0000, 1.0000, 1.0000, 2.0000]
1162
            sage: v.exponential_moving_average(0.5)
1163
            [0.0000, 1.0000, 1.0000, 1.0000, 1.5000]
1164

1165
        Some more examples::
1166

1167
            sage: v = finance.TimeSeries([1,2,3,4,5])
1168
            sage: v.exponential_moving_average(1)
1169
            [0.0000, 1.0000, 2.0000, 3.0000, 4.0000]
1170
            sage: v.exponential_moving_average(0)
1171
            [0.0000, 1.0000, 1.0000, 1.0000, 1.0000]
1172
        """
1173
        if alpha < 0 or alpha > 1:
1174
            raise ValueError, "alpha must be between 0 and 1"
1175
        cdef Py_ssize_t i
1176
        cdef TimeSeries t = new_time_series(self._length)
1177
        if self._length == 0:
1178
            return t
1179
        t._values[0] = 0
1180
        if self._length == 1:
1181
            return t
1182
        t._values[1] = self._values[0]
1183
        for i from 2 <= i < self._length:
1184
            t._values[i] = alpha * self._values[i-1] + (1-alpha) *t._values[i-1]
1185
        return t
1186

1187
    def sums(self, double s=0):
1188
        """
1189
        Return the new time series got by taking the running partial
1190
        sums of the terms of this time series.
1191

1192
        INPUT:
1193

1194
        - ``s`` -- starting value for partial sums.
1195

1196
        OUTPUT:
1197

1198
        A time series.
1199

1200
        EXAMPLES::
1201

1202
            sage: v = finance.TimeSeries([1,1,1,2,3]); v
1203
            [1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1204
            sage: v.sums()
1205
            [1.0000, 2.0000, 3.0000, 5.0000, 8.0000]
1206
        """
1207
        cdef Py_ssize_t i
1208
        cdef TimeSeries t = new_time_series(self._length) 
1209
        for i from 0 <= i < self._length:
1210
            s += self._values[i]
1211
            t._values[i] = s
1212
        return t
1213

1214
    cpdef double sum(self):
1215
        """
1216
        Return the sum of all the entries of ``self``.  If ``self`` has
1217
        length 0, returns 0.
1218

1219
        OUTPUT:
1220

1221
        A double.
1222

1223
        EXAMPLES::
1224

1225
            sage: v = finance.TimeSeries([1,1,1,2,3]); v
1226
            [1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1227
            sage: v.sum()
1228
            8.0
1229
        """
1230
        cdef double s = 0
1231
        cdef Py_ssize_t i
1232
        for i from 0 <= i < self._length:
1233
            s += self._values[i]
1234
        return s
1235

1236
    def prod(self):
1237
        """
1238
        Return the product of all the entries of ``self``.  If ``self`` has
1239
        length 0, returns 1. 
1240

1241
        OUTPUT:
1242

1243
        A double.
1244

1245
        EXAMPLES::
1246

1247
            sage: v = finance.TimeSeries([1,1,1,2,3]); v
1248
            [1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1249
            sage: v.prod()
1250
            6.0
1251
        """
1252
        cdef double s = 1
1253
        cdef Py_ssize_t i
1254
        for i from 0 <= i < self._length:
1255
            s *= self._values[i]
1256
        return s
1257
        
1258
    
1259
    def mean(self):
1260
        """
1261
        Return the mean (average) of the elements of ``self``.
1262

1263
        OUTPUT:
1264

1265
        A double.
1266
            
1267
        EXAMPLES::
1268

1269
            sage: v = finance.TimeSeries([1,1,1,2,3]); v
1270
            [1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1271
            sage: v.mean()
1272
            1.6
1273
        """
1274
        return self.sum() / self._length
1275

1276
    def pow(self, double k):
1277
        """
1278
        Return a new time series with every elements of ``self`` raised to the
1279
        k-th power.
1280

1281
        INPUT:
1282

1283
        - ``k`` -- a float.
1284

1285
        OUTPUT:
1286

1287
        A time series.
1288

1289
        EXAMPLES::
1290

1291
            sage: v = finance.TimeSeries([1,1,1,2,3]); v
1292
            [1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1293
            sage: v.pow(2)
1294
            [1.0000, 1.0000, 1.0000, 4.0000, 9.0000]        
1295
        """
1296
        cdef Py_ssize_t i
1297
        cdef TimeSeries t = new_time_series(self._length)
1298
        for i from 0 <= i < self._length:
1299
            t._values[i] = pow(self._values[i], k)
1300
        return t
1301

1302
    def moment(self, int k):
1303
        """
1304
        Return the k-th moment of ``self``, which is just the
1305
        mean of the k-th powers of the elements of ``self``. 
1306

1307
        INPUT:
1308

1309
        - ``k`` -- a positive integer.
1310

1311
        OUTPUT:
1312

1313
        A double.
1314
            
1315
        EXAMPLES::
1316

1317
            sage: v = finance.TimeSeries([1,1,1,2,3]); v
1318
            [1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1319
            sage: v.moment(1)
1320
            1.6
1321
            sage: v.moment(2)
1322
            3.2
1323
        """
1324
        if k <= 0:
1325
            raise ValueError, "k must be positive"
1326
        if k == 1:
1327
            return self.mean()
1328
        cdef double s = 0
1329
        cdef Py_ssize_t i
1330
        for i from 0 <= i < self._length:
1331
            s += pow(self._values[i], k)
1332
        return s / self._length
1333

1334
    def central_moment(self, int k):
1335
        """
1336
        Return the k-th central moment of ``self``, which is just the mean
1337
        of the k-th powers of the differences ``self[i] - mu``, where ``mu`` is
1338
        the mean of ``self``.
1339

1340
        INPUT:
1341

1342
        - ``k`` -- a positive integer.
1343

1344
        OUTPUT:
1345

1346
        A double.
1347

1348
        EXAMPLES::
1349

1350
            sage: v = finance.TimeSeries([1,2,3])
1351
            sage: v.central_moment(2)
1352
            0.6666666666666666
1353

1354
        Note that the central moment is different from the moment
1355
        here, since the mean is not `0`::
1356

1357
            sage: v.moment(2)     # doesn't subtract off mean
1358
            4.666666666666667
1359

1360
        We compute the central moment directly::
1361

1362
            sage: mu = v.mean(); mu
1363
            2.0
1364
            sage: ((1-mu)^2 + (2-mu)^2 + (3-mu)^2) / 3
1365
            0.6666666666666666
1366
        """
1367
        if k == 1:
1368
            return float(0)
1369
        mu = self.mean()
1370
        # We could make this slightly faster by doing the add scalar
1371
        # and moment calculation together.  But that would be nasty.
1372
        return self.add_scalar(-mu).moment(k)
1373

1374
    def covariance(self, TimeSeries other):
1375
        r"""
1376
        Return the covariance of the time series ``self`` and ``other``.
1377

1378
        INPUT:
1379

1380
        - ``self``, ``other`` -- time series.
1381

1382
        Whichever time series has more terms is truncated.
1383
        
1384
        EXAMPLES::
1385

1386
            sage: v = finance.TimeSeries([1,-2,3]); w = finance.TimeSeries([4,5,-10])
1387
            sage: v.covariance(w)
1388
            -11.777777777777779
1389
        """
1390
        cdef double mu = self.mean(), mu2 = other.mean()
1391
        cdef double s = 0
1392
        cdef Py_ssize_t i
1393
        cdef Py_ssize_t n = min(self._length, other._length)
1394
        for i from 0 <= i < n:
1395
            s += (self._values[i] - mu)*(other._values[i] - mu2)
1396
        return s / n
1397
        # NOTE: There is also a formula
1398
        #       self._values[i]*other._values[i] - mu*mu2
1399
        # but that seems less numerically stable (?), and when tested
1400
        # was not noticeably faster.
1401

1402
    def autocovariance(self, Py_ssize_t k=0):
1403
        r"""
1404
        Return the k-th autocovariance function `\gamma(k)` of ``self``.
1405
        This is the covariance of ``self`` with ``self`` shifted by `k`, i.e.,
1406

1407
        .. MATH::
1408

1409
            \left.
1410
            \left( \sum_{t=0}^{n-k-1} (x_t - \mu)(x_{t + k} - \mu) \right) 
1411
            \right/ n,
1412

1413
        where `n` is the length of ``self``. 
1414

1415
        Note the denominator of `n`, which gives a "better" sample
1416
        estimator.
1417

1418
        INPUT:
1419

1420
        - ``k`` -- a nonnegative integer (default: 0)
1421

1422
        OUTPUT:
1423

1424
        A float.
1425

1426
        EXAMPLES::
1427

1428
            sage: v = finance.TimeSeries([13,8,15,4,4,12,11,7,14,12])
1429
            sage: v.autocovariance(0)
1430
            14.4
1431
            sage: mu = v.mean(); sum([(a-mu)^2 for a in v])/len(v)
1432
            14.4
1433
            sage: v.autocovariance(1)
1434
            -2.7
1435
            sage: mu = v.mean(); sum([(v[i]-mu)*(v[i+1]-mu) for i in range(len(v)-1)])/len(v)
1436
            -2.7
1437
            sage: v.autocovariance(1)
1438
            -2.7
1439

1440
        We illustrate with a random sample that an independently and
1441
        identically distributed distribution with zero mean and
1442
        variance `\sigma^2` has autocovariance function `\gamma(h)`
1443
        with `\gamma(0) = \sigma^2` and `\gamma(h) = 0` for `h\neq 0`. ::
1444

1445
            sage: set_random_seed(0)
1446
            sage: v = finance.TimeSeries(10^6)
1447
            sage: v.randomize('normal', 0, 5)
1448
            [3.3835, -2.0055, 1.7882, -2.9319, -4.6827 ... -5.1868, 9.2613, 0.9274, -6.2282, -8.7652]
1449
            sage: v.autocovariance(0)
1450
            24.95410689...
1451
            sage: v.autocovariance(1)
1452
            -0.00508390...
1453
            sage: v.autocovariance(2)
1454
            0.022056325...
1455
            sage: v.autocovariance(3)
1456
            -0.01902000...
1457
        """
1458
        cdef double mu = self.mean()
1459
        cdef double s = 0
1460
        cdef Py_ssize_t i
1461
        cdef Py_ssize_t n = self._length - k
1462
        for i from 0 <= i < n:
1463
            s += (self._values[i] - mu)*(self._values[i+k] - mu)
1464
        return s / self._length
1465

1466
    def correlation(self, TimeSeries other):
1467
        """
1468
        Return the correlation of ``self`` and ``other``, which is the
1469
        covariance of ``self`` and ``other`` divided by the product of their
1470
        standard deviation.
1471

1472
        INPUT:
1473

1474
        - ``self``, ``other`` -- time series. 
1475
            
1476
        Whichever time series has more terms is truncated.
1477

1478
        EXAMPLES::
1479

1480
            sage: v = finance.TimeSeries([1,-2,3]); w = finance.TimeSeries([4,5,-10])
1481
            sage: v.correlation(w)
1482
            -0.558041609...
1483
            sage: v.covariance(w)/(v.standard_deviation() * w.standard_deviation())
1484
            -0.558041609...
1485
        """
1486
        return self.covariance(other) / (self.standard_deviation() * other.standard_deviation())
1487

1488
    def autocorrelation(self, Py_ssize_t k=1):
1489
        r"""
1490
        Return the k-th sample autocorrelation of this time series
1491
        `x_i`.
1492

1493
        Let `\mu` be the sample mean.  Then the sample autocorrelation
1494
        function is
1495

1496
        .. MATH::
1497

1498
            \frac{\sum_{t=0}^{n-k-1} (x_t - \mu)(x_{t+k} - \mu) }
1499
                 {\sum_{t=0}^{n-1}   (x_t - \mu)^2}.
1500

1501
        Note that the variance must be nonzero or you will get a
1502
        ``ZeroDivisionError``.
1503

1504
        INPUT:
1505

1506
        - ``k`` -- a nonnegative integer (default: 1)
1507
        
1508
        OUTPUT:
1509

1510
        A time series.
1511

1512
        EXAMPLE::
1513

1514
            sage: v = finance.TimeSeries([13,8,15,4,4,12,11,7,14,12])
1515
            sage: v.autocorrelation()
1516
            -0.1875
1517
            sage: v.autocorrelation(1)
1518
            -0.1875
1519
            sage: v.autocorrelation(0)
1520
            1.0
1521
            sage: v.autocorrelation(2)
1522
            -0.20138888888888887
1523
            sage: v.autocorrelation(3)
1524
            0.18055555555555555
1525
            
1526
            sage: finance.TimeSeries([1..1000]).autocorrelation()
1527
            0.997
1528
        """
1529
        return self.autocovariance(k) / self.variance(bias=True)
1530

1531
    def variance(self, bias=False):
1532
        """
1533
        Return the variance of the elements of ``self``, which is the mean
1534
        of the squares of the differences from the mean.
1535

1536
        INPUT:
1537

1538
        - ``bias`` -- bool (default: ``False``); if ``False``, divide by
1539
          ``self.length() - 1`` instead of ``self.length()`` to give a less
1540
          biased estimator for the variance.
1541

1542
        OUTPUT:
1543

1544
        A double.
1545

1546
        EXAMPLE::
1547

1548
            sage: v = finance.TimeSeries([1,1,1,2,3]); v
1549
            [1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1550
            sage: v.variance()
1551
            0.8
1552
            sage: v.variance(bias=True)
1553
            0.64
1554

1555
        TESTS::
1556

1557
            sage: finance.TimeSeries([1]).variance()
1558
            0.0
1559
            sage: finance.TimeSeries([]).variance()
1560
            0.0
1561
        """
1562
        if self._length <= 1:
1563
            return float(0)
1564
        cdef double mu = self.mean()
1565
        cdef double s = 0
1566
        cdef double a
1567
        cdef Py_ssize_t i
1568
        for i from 0 <= i < self._length:
1569
            a = self._values[i] - mu
1570
            s += a * a
1571
        if bias:
1572
            return s / self._length
1573
        else:
1574
            return s / (self._length - 1)
1575
    
1576
    def standard_deviation(self, bias=False):
1577
        """
1578
        Return the standard deviation of the entries of ``self``.
1579

1580
        INPUT:
1581

1582
        - ``bias`` -- bool (default: ``False``); if ``False``, divide by
1583
          ``self.length() - 1`` instead of ``self.length()`` to give a less
1584
          biased estimator for the variance.
1585

1586
        OUTPUT:
1587

1588
        A double.
1589

1590
        EXAMPLES::
1591

1592
            sage: v = finance.TimeSeries([1,1,1,2,3]); v
1593
            [1.0000, 1.0000, 1.0000, 2.0000, 3.0000]
1594
            sage: v.standard_deviation()
1595
            0.8944271909...
1596
            sage: v.standard_deviation(bias=True)
1597
            0.8
1598

1599
        TESTS::
1600

1601
            sage: finance.TimeSeries([1]).standard_deviation()
1602
            0.0
1603
            sage: finance.TimeSeries([]).standard_deviation()
1604
            0.0
1605
        """
1606
        return sqrt(self.variance(bias=bias))
1607

1608
    def range_statistic(self, b=None):
1609
        r"""
1610
        Return the rescaled range statistic `R/S` of ``self``, which is
1611
        defined as follows (see Hurst 1951).  If the optional
1612
        parameter ``b`` is given, return the average of `R/S` range
1613
        statistics of disjoint blocks of size ``b``.
1614

1615
        Let `\sigma` be the standard deviation of the sequence of
1616
        differences of ``self``, and let `Y_k` be the k-th term of ``self``.
1617
        Let `n` be the number of terms of ``self``, and set
1618
        `Z_k = Y_k - ((k+1)/n) \cdot Y_n`. Then
1619

1620
        .. MATH::
1621

1622
            R/S = \big( \max(Z_k) - \min(Z_k) \big) / \sigma
1623

1624
        where the max and min are over all `Z_k`.
1625
        Basically replacing `Y_k` by `Z_k` allows us to measure
1626
        the difference from the line from the origin to `(n,Y_n)`.
1627

1628
        INPUT:
1629

1630
        - ``self`` -- a time series  (*not* the series of differences).
1631

1632
        - ``b`` -- integer (default: ``None``); if given instead divide the
1633
          input time series up into ``j = floor(n/b)`` disjoint
1634
          blocks, compute the `R/S` statistic for each block,
1635
          and return the average of those `R/S` statistics.
1636

1637
        OUTPUT:
1638

1639
        A float.
1640

1641
        EXAMPLES:
1642

1643
        Notice that if we make a Brownian motion random walk, there
1644
        is no difference if we change the standard deviation. ::
1645

1646
            sage: set_random_seed(0); finance.TimeSeries(10^6).randomize('normal').sums().range_statistic()
1647
            1897.8392602...
1648
            sage: set_random_seed(0); finance.TimeSeries(10^6).randomize('normal',0,100).sums().range_statistic()
1649
            1897.8392602...
1650
        """
1651
        cdef Py_ssize_t j, k, n = self._length
1652

1653
        if b is not None:
1654
            j = n // b
1655
            return sum([self[k*b:(k+1)*b].add_scalar(-self[k*b]).range_statistic() for k in range(j)]) / j
1656

1657
        cdef double Yn = self._values[n - 1]
1658
        cdef TimeSeries Z = self.__copy__()
1659
        for k from 0 <= k < n:
1660
            Z._values[k] -= (k+1)*Yn / n
1661

1662
        sigma = self.diffs().standard_deviation()
1663
        
1664
        return (Z.max() - Z.min()) / sigma
1665

1666
    def hurst_exponent(self):
1667
        """
1668
        Returns an estimate of the Hurst exponent of this time series.
1669
        We use the algorithm from pages 61 -- 63 of [Peteres, Fractal
1670
        Market Analysis (1994); see Google Books].
1671

1672
        We define the Hurst exponent of a constant time series to be 1.
1673

1674
        EXAMPLES:
1675

1676
        The Hurst exponent of Brownian motion is 1/2.  We approximate
1677
        it with some specific samples.  Note that the estimator is
1678
        biased and slightly overestimates. ::
1679

1680
            sage: set_random_seed(0)
1681
            sage: bm = finance.TimeSeries(10^5).randomize('normal').sums(); bm
1682
            [0.6767, 0.2756, 0.6332, 0.0469, -0.8897 ... 152.2437, 151.5327, 152.7629, 152.9169, 152.9084]
1683
            sage: bm.hurst_exponent()
1684
            0.527450972...
1685

1686
        We compute the Hurst exponent of a simulated fractional Brownian
1687
        motion with Hurst parameter 0.7.  This function estimates the
1688
        Hurst exponent as 0.706511951... ::
1689

1690
            sage: set_random_seed(0)
1691
            sage: fbm = finance.fractional_brownian_motion_simulation(0.7,0.1,10^5,1)[0]
1692
            sage: fbm.hurst_exponent()
1693
            0.706511951...
1694

1695
        Another example with small Hurst exponent (notice the overestimation).
1696

1697
        ::
1698

1699
            sage: fbm = finance.fractional_brownian_motion_simulation(0.2,0.1,10^5,1)[0]
1700
            sage: fbm.hurst_exponent()
1701
            0.278997441...
1702

1703
        We compute the mean Hurst exponent of 100 simulated multifractal
1704
        cascade random walks::
1705

1706
            sage: set_random_seed(0)
1707
            sage: y = finance.multifractal_cascade_random_walk_simulation(3700,0.02,0.01,0.01,1000,100)
1708
            sage: finance.TimeSeries([z.hurst_exponent() for z in y]).mean()
1709
            0.579848225779347...
1710

1711
        We compute the mean Hurst exponent of 100 simulated Markov switching
1712
        multifractal time series.  The Hurst exponent is quite small. ::
1713

1714
            sage: set_random_seed(0)
1715
            sage: msm = finance.MarkovSwitchingMultifractal(8,1.4,0.5,0.95,3)
1716
            sage: y = msm.simulations(1000,100)
1717
            sage: finance.TimeSeries([z.hurst_exponent() for z in y]).mean()
1718
            0.286102325623705...
1719
        """
1720
        # We use disjoint blocks of size 8, 16, 32, ....
1721
        cdef Py_ssize_t k = 8
1722
        if self._length <= 32:   # small data, estimate of Hurst will suck but...
1723
            k = 4
1724
        v0 = []
1725
        v1 = []
1726
        while k <= self._length:
1727
            try:
1728
                v1.append(log(self.range_statistic(k)))
1729
                v0.append(log(k))
1730
            except ZeroDivisionError:   # 0 standard deviation
1731
                pass
1732
            k *= 2
1733
        if len(v0) == 0:
1734
            return float(1)
1735
        if len(v0) == 1:
1736
            return v1[0]/v0[0]
1737
        import numpy
1738
        coeffs = numpy.polyfit(v0,v1,1)
1739
        return coeffs[0]
1740

1741
    def min(self, bint index=False):
1742
        """
1743
        Return the smallest value in this time series. If this series
1744
        has length 0 we raise a ``ValueError``.
1745

1746
        INPUT:
1747

1748
        - ``index`` -- bool (default: ``False``); if ``True``, also return
1749
          index of minimal entry.
1750

1751
        OUTPUT:
1752

1753
        - float -- smallest value.
1754

1755
        - integer -- index of smallest value; only returned if ``index=True``.
1756

1757
        EXAMPLES::
1758

1759
            sage: v = finance.TimeSeries([1,-4,3,-2.5,-4])
1760
            sage: v.min()
1761
            -4.0
1762
            sage: v.min(index=True)
1763
            (-4.0, 1)
1764
        """
1765
        if self._length == 0:
1766
            raise ValueError, "min() arg is an empty sequence"
1767
        cdef Py_ssize_t i, j
1768
        cdef double s = self._values[0]
1769
        j = 0
1770
        for i from 1 <= i < self._length:
1771
            if self._values[i] < s:
1772
                s = self._values[i]
1773
                j = i
1774
        if index:
1775
            return s, j
1776
        else:
1777
            return s
1778
            
1779
    def max(self, bint index=False):
1780
        """
1781
        Return the largest value in this time series. If this series
1782
        has length 0 we raise a ``ValueError``.
1783

1784
        INPUT:
1785

1786
        - ``index`` -- bool (default: ``False``); if ``True``, also return
1787
          index of maximum entry.
1788

1789
        OUTPUT:
1790

1791
        - float -- largest value.
1792

1793
        - integer -- index of largest value; only returned if ``index=True``.
1794

1795
        EXAMPLES::
1796

1797
            sage: v = finance.TimeSeries([1,-4,3,-2.5,-4,3])
1798
            sage: v.max()
1799
            3.0
1800
            sage: v.max(index=True)
1801
            (3.0, 2)
1802
        """
1803
        if self._length == 0:
1804
            raise ValueError, "max() arg is an empty sequence"
1805
        cdef Py_ssize_t i, j = 0
1806
        cdef double s = self._values[0]
1807
        for i from 1 <= i < self._length:
1808
            if self._values[i] > s:
1809
                s = self._values[i]
1810
                j = i
1811
        if index:
1812
            return s, j
1813
        else:
1814
            return s
1815

1816
    def clip_remove(self, min=None, max=None):
1817
        """
1818
        Return new time series obtained from ``self`` by removing all
1819
        values that are less than or equal to a certain minimum value
1820
        or greater than or equal to a certain maximum.
1821

1822
        INPUT:
1823

1824
        - ``min`` -- (default: ``None``) ``None`` or double.
1825

1826
        - ``max`` -- (default: ``None``) ``None`` or double.
1827

1828
        OUTPUT:
1829

1830
        A time series.
1831

1832
        EXAMPLES::
1833

1834
            sage: v = finance.TimeSeries([1..10])
1835
            sage: v.clip_remove(3,7)
1836
            [3.0000, 4.0000, 5.0000, 6.0000, 7.0000]
1837
            sage: v.clip_remove(3)
1838
            [3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, 10.0000]
1839
            sage: v.clip_remove(max=7)
1840
            [1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000]
1841
        """
1842
        cdef Py_ssize_t i, j
1843
        cdef TimeSeries t
1844
        cdef double mn, mx
1845
        cdef double x
1846
        if min is None and max is None:
1847
            return self.copy()
1848
        # This code is ugly but I think as fast as possible.
1849
        if min is None and max is not None:
1850
            # Return everything <= max
1851
            j = 0
1852
            mx = max
1853
            for i from 0 <= i < self._length:
1854
                if self._values[i] <= mx:
1855
                    j += 1
1856
                    
1857
            t = TimeSeries(j)
1858
            j = 0
1859
            for i from 0 <= i < self._length:
1860
                if self._values[i] <= mx:
1861
                    t._values[j] = self._values[i]
1862
                    j += 1
1863
        elif max is None and min is not None:
1864
            # Return everything >= min
1865
            j = 0
1866
            mn = min
1867
            for i from 0 <= i < self._length:
1868
                if self._values[i] >= mn:
1869
                    j += 1
1870
            t = TimeSeries(j)
1871
            j = 0
1872
            for i from 0 <= i < self._length:
1873
                if self._values[i] >= mn:
1874
                    t._values[j] = self._values[i]
1875
                    j += 1
1876
        else:
1877
            # Return everything between min and max
1878
            j = 0
1879
            mn = min; mx = max
1880
            for i from 0 <= i < self._length:
1881
                x = self._values[i]
1882
                if x >= mn and x <= mx:
1883
                    j += 1
1884
            t = TimeSeries(j)
1885
            j = 0
1886
            for i from 0 <= i < self._length:
1887
                x = self._values[i]
1888
                if x >= mn and x <= mx:
1889
                    t._values[j] = x
1890
                    j += 1
1891
        return t
1892

1893
    def histogram(self, Py_ssize_t bins=50, bint normalize=False):
1894
        """
1895
        Return the frequency histogram of the values in
1896
        this time series divided up into the given
1897
        number of bins.
1898

1899
        INPUT:
1900

1901
        - ``bins`` -- a positive integer (default: 50)
1902

1903
        - ``normalize`` -- (default: ``False``) whether to normalize so the
1904
          total area in the bars of the histogram is 1. 
1905

1906
        OUTPUT:
1907

1908
        - counts -- list of counts of numbers of elements in
1909
          each bin.
1910

1911
        - endpoints -- list of 2-tuples (a,b) that give the
1912
          endpoints of the bins.
1913
                      
1914
        EXAMPLES::
1915

1916
            sage: v = finance.TimeSeries([5,4,1.3,2,8,10,3,-5]); v
1917
            [5.0000, 4.0000, 1.3000, 2.0000, 8.0000, 10.0000, 3.0000, -5.0000]
1918
            sage: v.histogram(3)
1919
            ([1, 4, 3], [(-5.0, 0.0), (0.0, 5.0), (5.0, 10.0)])
1920
        """
1921
        if bins <= 0:
1922
            raise ValueError, "bins must be positive"
1923
        
1924
        cdef double mn = self.min(), mx = self.max()
1925
        cdef double r = mx - mn, step = r/bins
1926
        cdef Py_ssize_t j
1927

1928
        if r == 0:
1929
            raise ValueError, "bins have 0 width"
1930

1931
        v = [(mn + j*step, mn + (j+1)*step) for j in range(bins)]
1932
        if self._length == 0:
1933
            return [], v
1934
        
1935
        if step == 0:
1936
            counts = [0]*bins
1937
            counts[0] = [self._length]
1938
            return counts, v
1939

1940
        cdef Py_ssize_t i
1941
        cdef Py_ssize_t* cnts = <Py_ssize_t*>sage_malloc(sizeof(Py_ssize_t)*bins)
1942
        if cnts == NULL:
1943
            raise MemoryError
1944
        for i from 0 <= i < bins:
1945
            cnts[i] = 0
1946

1947
        for i from 0 <= i < self._length:
1948
            j = int((self._values[i] - mn)/step)
1949
            if j >= bins:
1950
                j = bins-1
1951
            cnts[j] += 1
1952

1953
        b = 1.0/(self._length * step)
1954
        if normalize:
1955
            counts = [cnts[i]*b for i in range(bins)]
1956
        else:
1957
            counts = [cnts[i] for i in range(bins)]
1958
        sage_free(cnts)
1959

1960
        return counts, v
1961

1962
    def plot_histogram(self, bins=50, normalize=True, **kwds):
1963
        """
1964
        Return histogram plot of this time series with given number of bins.
1965
                
1966
        INPUT:
1967

1968
        - ``bins`` -- positive integer (default: 50)
1969

1970
        - ``normalize`` -- (default: ``True``) whether to normalize so the
1971
          total area in the bars of the histogram is 1. 
1972

1973
        OUTPUT:
1974

1975
        A histogram plot.
1976

1977
        EXAMPLES::
1978

1979
            sage: v = finance.TimeSeries([1..50])
1980
            sage: v.plot_histogram(bins=10)
1981
        """
1982
        from sage.plot.all import polygon
1983
        counts, intervals = self.histogram(bins, normalize=normalize)
1984
        s = 0
1985
        for i, (x0,x1) in enumerate(intervals):
1986
            s += polygon([(x0,0), (x0,counts[i]), (x1,counts[i]), (x1,0)], **kwds)
1987
        if len(intervals) > 0:
1988
            s.axes_range(ymin=0, ymax=max(counts), xmin=intervals[0][0], xmax=intervals[-1][1])
1989
        return s
1990

1991
    def plot_candlestick(self, int bins=30):
1992
        """
1993
        Return a candlestick plot of this time series with the given number
1994
        of bins.
1995

1996
        A candlestick plot is a style of bar-chart used to view open, high,
1997
        low, and close stock data. At each bin, the line represents the
1998
        high / low range. The bar represents the open / close range. The
1999
        interval is colored blue if the open for that bin is less than the
2000
        close. If the close is less than the open, then that bin is colored
2001
        red instead.
2002

2003
        INPUT:
2004

2005
        - ``bins`` -- positive integer (default: 30), the number of bins
2006
          or candles.
2007

2008
        OUTPUT:
2009

2010
        A candlestick plot.
2011

2012
        EXAMPLES:
2013

2014
        Here we look at the candlestick plot for Brownian motion::
2015

2016
            sage: v = finance.TimeSeries(1000).randomize()
2017
            sage: v.plot_candlestick(bins=20)
2018
        """
2019
        from sage.plot.all import line, polygon, Graphics
2020

2021
        cdef TimeSeries t = new_time_series(self._length)
2022
        cdef TimeSeries s
2023
        cdef int i, j, n
2024
        bin_size = int(t._length/bins)
2025
        rng      = t._length - bin_size
2026
        
2027
        memcpy(t._values, self._values, sizeof(double)*self._length)
2028
        p = Graphics()
2029

2030
        for i from 0 <= i < bins:
2031
            n = i*bin_size
2032
            s = new_time_series(bin_size)
2033
            for j from 0 <= j < bin_size:
2034
                s._values[j] = t._values[n + j]
2035
            low   = s.min()
2036
            high  = s.max()
2037
            open  = s._values[0]
2038
            close = s._values[bin_size-1]
2039
            left  = n + bin_size/3
2040
            mid   = n + bin_size/2
2041
            right = n + 2*bin_size/3
2042

2043
            rgbcolor =  'blue' if open < close else 'red'
2044

2045
            p += line([(mid, low), (mid, high)], rgbcolor=rgbcolor)
2046
            p += polygon([(left, open), (right, open), (right, close), (left, close)], rgbcolor=rgbcolor)
2047

2048
        return p
2049

2050
    def numpy(self, copy=True):
2051
        """
2052
        Return a NumPy version of this time series.
2053
        
2054
        .. NOTE::
2055

2056
            If copy is ``False``, return a NumPy 1-D array reference to
2057
            exactly the same block of memory as this time series.  This is
2058
            very, very fast and makes it easy to quickly use all
2059
            NumPy/SciPy functionality on ``self``.  However, it is dangerous
2060
            because when this time series goes out of scope and is garbage
2061
            collected, the corresponding NumPy reference object will point
2062
            to garbage.
2063

2064
        INPUT:
2065

2066
        - ``copy`` -- bool (default: ``True``)
2067

2068
        OUTPUT:
2069

2070
        A numpy 1-D array.
2071

2072
        EXAMPLES::
2073

2074
            sage: v = finance.TimeSeries([1,-3,4.5,-2])
2075
            sage: w = v.numpy(copy=False); w
2076
            array([ 1. , -3. ,  4.5, -2. ])
2077
            sage: type(w)
2078
            <type 'numpy.ndarray'>
2079
            sage: w.shape
2080
            (4,)
2081

2082
        Notice that changing ``w`` also changes ``v`` too! ::
2083

2084
            sage: w[0] = 20
2085
            sage: w
2086
            array([ 20. ,  -3. ,   4.5,  -2. ])
2087
            sage: v
2088
            [20.0000, -3.0000, 4.5000, -2.0000]
2089

2090
        If you want a separate copy do not give the ``copy=False`` option. ::
2091

2092
            sage: z = v.numpy(); z
2093
            array([ 20. ,  -3. ,   4.5,  -2. ])
2094
            sage: z[0] = -10
2095
            sage: v
2096
            [20.0000, -3.0000, 4.5000, -2.0000]
2097
        """
2098
        cnumpy.import_array() #This must be called before using the numpy C/api or you will get segfault
2099
        cdef cnumpy.npy_intp dims[1]
2100
        dims[0] = self._length
2101
        cdef cnumpy.ndarray n = cnumpy.PyArray_SimpleNewFromData(1, dims, cnumpy.NPY_DOUBLE, self._values)
2102
        if copy:
2103
            return n.copy()
2104
        else:
2105
#            Py_INCREF(self)
2106
#            n.base = self
2107
            return n
2108

2109
    def randomize(self, distribution='uniform', loc=0, scale=1, **kwds):
2110
        r"""
2111
        Randomize the entries in this time series, and return a reference
2112
        to ``self``.  Thus this function both changes ``self`` in place, and
2113
        returns a copy of ``self``, for convenience. 
2114
        
2115
        INPUT:
2116

2117
        - ``distribution`` -- (default: ``"uniform"``); supported values are:
2118

2119
          - ``'uniform'`` -- from ``loc`` to ``loc + scale``
2120

2121
          - ``'normal'`` -- mean ``loc`` and standard deviation ``scale``
2122

2123
          - ``'semicircle'`` -- with center at ``loc`` (``scale`` is ignored)
2124

2125
          - ``'lognormal'`` -- mean ``loc`` and standard deviation ``scale``
2126

2127
        - ``loc`` -- float (default: 0)
2128

2129
        - ``scale`` -- float (default: 1)
2130

2131
        .. NOTE::
2132

2133
            All random numbers are generated using algorithms that
2134
            build on the high quality GMP random number function
2135
            gmp_urandomb_ui.  Thus this function fully respects the Sage
2136
            ``set_random_state`` command.  It's not quite as fast as the C
2137
            library random number generator, but is of course much better
2138
            quality, and is platform independent.
2139

2140
        EXAMPLES:
2141

2142
        We generate 5 uniform random numbers in the interval [0,1]::
2143

2144
            sage: set_random_seed(0)
2145
            sage: finance.TimeSeries(5).randomize()
2146
            [0.8685, 0.2816, 0.0229, 0.1456, 0.7314]
2147

2148
        We generate 5 uniform random numbers from 5 to `5+2=7`::
2149

2150
            sage: set_random_seed(0)
2151
            sage: finance.TimeSeries(10).randomize('uniform', 5, 2)
2152
            [6.7369, 5.5632, 5.0459, 5.2911, 6.4628, 5.2412, 5.2010, 5.2761, 5.5813, 5.5439]
2153

2154
        We generate 5 normal random values with mean 0 and variance 1. ::
2155

2156
            sage: set_random_seed(0)
2157
            sage: finance.TimeSeries(5).randomize('normal')
2158
            [0.6767, -0.4011, 0.3576, -0.5864, -0.9365]
2159

2160
        We generate 10 normal random values with mean 5 and variance 2. ::
2161

2162
            sage: set_random_seed(0)
2163
            sage: finance.TimeSeries(10).randomize('normal', 5, 2)
2164
            [6.3534, 4.1978, 5.7153, 3.8273, 3.1269, 2.9598, 3.7484, 6.7472, 3.8986, 4.6271]
2165

2166
        We generate 5 values using the semicircle distribution. ::
2167

2168
            sage: set_random_seed(0)
2169
            sage: finance.TimeSeries(5).randomize('semicircle')
2170
            [0.7369, -0.9541, 0.4628, -0.7990, -0.4187]
2171

2172
        We generate 1 million normal random values and create a frequency
2173
        histogram. ::
2174

2175
            sage: set_random_seed(0)
2176
            sage: a = finance.TimeSeries(10^6).randomize('normal')
2177
            sage: a.histogram(10)[0]
2178
            [36, 1148, 19604, 130699, 340054, 347870, 137953, 21290, 1311, 35]
2179

2180
        We take the above values, and compute the proportion that lie within
2181
        1, 2, 3, 5, and 6 standard deviations of the mean (0)::
2182

2183
            sage: s = a.standard_deviation()
2184
            sage: len(a.clip_remove(-s,s))/float(len(a))
2185
            0.683094
2186
            sage: len(a.clip_remove(-2*s,2*s))/float(len(a))
2187
            0.954559
2188
            sage: len(a.clip_remove(-3*s,3*s))/float(len(a))
2189
            0.997228
2190
            sage: len(a.clip_remove(-5*s,5*s))/float(len(a))
2191
            0.999998
2192

2193
        There were no "six sigma events"::
2194

2195
            sage: len(a.clip_remove(-6*s,6*s))/float(len(a))
2196
            1.0
2197
        """
2198
        if distribution == 'uniform':
2199
            self._randomize_uniform(loc, loc + scale)
2200
        elif distribution == 'normal':
2201
            self._randomize_normal(loc, scale)
2202
        elif distribution == 'semicircle':
2203
            self._randomize_semicircle(loc)
2204
        elif distribution == 'lognormal':
2205
            self._randomize_lognormal(loc, scale)
2206
        else:
2207
            raise NotImplementedError
2208
        return self
2209

2210
    def _randomize_uniform(self, double left, double right):
2211
        """
2212
        Generates a uniform random distribution of doubles between ``left`` and
2213
        ``right`` and stores values in place.
2214

2215
        INPUT:
2216

2217
        - ``left`` -- left bound on random distribution.
2218

2219
        - ``right`` -- right bound on random distribution.
2220

2221
        EXAMPLES:
2222

2223
        We generate 5 values distributed with respect to the uniform
2224
        distribution over the interval [0,1]::
2225

2226
            sage: v = finance.TimeSeries(5)
2227
            sage: set_random_seed(0)
2228
            sage: v.randomize('uniform')
2229
            [0.8685, 0.2816, 0.0229, 0.1456, 0.7314]
2230

2231
        We now test that the mean is indeed 0.5::
2232

2233
            sage: v = finance.TimeSeries(10^6)
2234
            sage: set_random_seed(0)
2235
            sage: v.randomize('uniform').mean()
2236
            0.50069085...
2237
        """
2238
        if left >= right:
2239
            raise ValueError, "left must be less than right"
2240

2241
        cdef randstate rstate = current_randstate()
2242
        cdef Py_ssize_t k
2243
        cdef double d = right - left
2244
        for k from 0 <= k < self._length:
2245
            self._values[k] = rstate.c_rand_double() * d + left
2246

2247
    def _randomize_normal(self, double m, double s):
2248
        """
2249
        Generates a normal random distribution of doubles with mean ``m`` and
2250
        standard deviation ``s`` and stores values in place. Uses the
2251
        Box-Muller algorithm.
2252
        
2253
        INPUT:
2254

2255
        - ``m`` -- mean
2256

2257
        - ``s`` -- standard deviation
2258

2259
        EXAMPLES:
2260

2261
        We generate 5 values distributed with respect to the normal
2262
        distribution with mean 0 and standard deviation 1::
2263

2264
            sage: set_random_seed(0)
2265
            sage: v = finance.TimeSeries(5)
2266
            sage: v.randomize('normal')
2267
            [0.6767, -0.4011, 0.3576, -0.5864, -0.9365]
2268

2269
        We now test that the mean is indeed 0::
2270

2271
            sage: set_random_seed(0)
2272
            sage: v = finance.TimeSeries(10^6)
2273
            sage: v.randomize('normal').mean()
2274
            6.2705472723...
2275

2276
        The same test with mean equal to 2 and standard deviation equal
2277
        to 5::
2278

2279
            sage: set_random_seed(0)
2280
            sage: v = finance.TimeSeries(10^6)
2281
            sage: v.randomize('normal', 2, 5).mean()
2282
            2.0003135273...
2283
        """
2284
        # Ported from http://users.tkk.fi/~nbeijar/soft/terrain/source_o2/boxmuller.c
2285
        # This the box muller algorithm.
2286
        cdef randstate rstate = current_randstate()
2287

2288
        cdef double x1, x2, w, y1, y2
2289
        cdef Py_ssize_t k
2290
        for k from 0 <= k < self._length:
2291
            while 1:
2292
                x1 = 2*rstate.c_rand_double() - 1
2293
                x2 = 2*rstate.c_rand_double() - 1
2294
                w = x1*x1 + x2*x2
2295
                if w < 1: break
2296
            w = sqrt( (-2*log(w))/w )
2297
            y1 = x1 * w
2298
            y2 = x2 * w
2299
            self._values[k] = m + y1*s
2300
            k += 1
2301
            if k < self._length:
2302
                self._values[k] = m + y2*s
2303

2304
    def _randomize_semicircle(self, double center):
2305
        """
2306
        Generates a semicircle random distribution of doubles about center
2307
        and stores values in place. Uses the acceptance-rejection method.
2308

2309
        INPUT:
2310

2311
        - ``center`` -- the center of the semicircle distribution.
2312

2313
        EXAMPLES:
2314

2315
        We generate 5 values distributed with respect to the semicircle
2316
        distribution located at center::
2317

2318
            sage: v = finance.TimeSeries(5)
2319
            sage: set_random_seed(0)
2320
            sage: v.randomize('semicircle')
2321
            [0.7369, -0.9541, 0.4628, -0.7990, -0.4187]
2322

2323
        We now test that the mean is indeed the center::
2324

2325
            sage: v = finance.TimeSeries(10^6)
2326
            sage: set_random_seed(0)
2327
            sage: v.randomize('semicircle').mean()
2328
            0.0007207497...
2329

2330
        The same test with center equal to 2::
2331

2332
            sage: v = finance.TimeSeries(10^6)
2333
            sage: set_random_seed(0)
2334
            sage: v.randomize('semicircle', 2).mean()
2335
            2.0007207497...
2336
        """
2337
        cdef Py_ssize_t k
2338
        cdef double x, y, s, d = 2, left = center - 1, z
2339
        cdef randstate rstate = current_randstate()
2340
        z = d*d
2341
        s = 1.5707963267948966192  # pi/2
2342
        for k from 0 <= k < self._length:
2343
            while 1:
2344
                x = rstate.c_rand_double() * d - 1
2345
                y = rstate.c_rand_double() * s
2346
                if y*y + x*x < 1:
2347
                    break
2348
            self._values[k] = x + center
2349

2350
    def _randomize_lognormal(self, double m, double s):
2351
        r"""
2352
        Generates a log-normal random distribution of doubles with mean ``m``
2353
        and standard deviation ``s``. Uses Box-Muller algorithm and the
2354
        identity: if `Y` is a random variable with normal distribution then
2355
        `X = \exp(Y)` is a random variable with log-normal distribution.
2356

2357
        INPUT:
2358

2359
        - ``m`` -- mean
2360

2361
        - ``s`` -- standard deviation
2362

2363
        EXAMPLES:
2364

2365
        We generate 5 values distributed with respect to the lognormal
2366
        distribution with mean 0 and standard deviation 1::
2367

2368
            sage: set_random_seed(0)
2369
            sage: v = finance.TimeSeries(5)
2370
            sage: v.randomize('lognormal')
2371
            [1.9674, 0.6696, 1.4299, 0.5563, 0.3920]
2372

2373
        We now test that the mean is indeed `\sqrt{e}`::
2374

2375
            sage: set_random_seed(0)
2376
            sage: v = finance.TimeSeries(10^6)
2377
            sage: v.randomize('lognormal').mean()
2378
            1.647351973...
2379
            sage: exp(0.5)
2380
            1.648721270...
2381
            
2382
        A log-normal distribution can be simply thought of as the logarithm
2383
        of a normally distributed data set. We test that here by generating
2384
        5 values distributed with respect to the normal distribution with mean
2385
        0 and standard deviation 1::
2386

2387
            sage: set_random_seed(0)
2388
            sage: w = finance.TimeSeries(5)
2389
            sage: w.randomize('normal')
2390
            [0.6767, -0.4011, 0.3576, -0.5864, -0.9365]
2391
            sage: exp(w)
2392
            [1.9674, 0.6696, 1.4299, 0.5563, 0.3920]
2393
        """
2394
        # Ported from http://users.tkk.fi/~nbeijar/soft/terrain/source_o2/boxmuller.c
2395
        # This the box muller algorithm.
2396
        cdef randstate rstate = current_randstate()
2397
        cdef double x1, x2, w, y1, y2
2398
        cdef Py_ssize_t k
2399
        for k from 0 <= k < self._length:
2400
            while 1:
2401
                x1 = 2*rstate.c_rand_double() - 1
2402
                x2 = 2*rstate.c_rand_double() - 1
2403
                w = x1*x1 + x2*x2
2404
                if w < 1: break
2405
            w = sqrt( (-2*log(w))/w )
2406
            y1 = x1 * w
2407
            y2 = x2 * w
2408
            self._values[k] = exp(m + y1*s)
2409
            k += 1
2410
            if k < self._length:
2411
                self._values[k] = exp(m + y2*s)
2412
        
2413
    def fft(self, bint overwrite=False):
2414
        r"""
2415
        Return the real discrete fast Fourier transform of ``self``, as a
2416
        real time series:
2417

2418
        .. MATH::
2419

2420
            [y(0),\Re(y(1)),\Im(y(1)),\dots,\Re(y(n/2))]  \text{ if $n$ is even}
2421
          
2422
            [y(0),\Re(y(1)),\Im(y(1)),\dots,\Re(y(n/2)),\Im(y(n/2))] \text{ if $n$ is odd}
2423
          
2424
        where
2425

2426
        .. MATH::
2427

2428
            y(j) = \sum_{k=0}^{n-1} x[k] \cdot \exp(-\sqrt{-1} \cdot jk \cdot 2\pi/n)
2429

2430
        for `j = 0,\dots,n-1`.  Note that `y(-j) = y(n-j)`.
2431
    
2432
        EXAMPLES::
2433

2434
            sage: v = finance.TimeSeries([1..9]); v
2435
            [1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000]
2436
            sage: w = v.fft(); w
2437
            [45.0000, -4.5000, 12.3636, -4.5000, 5.3629, -4.5000, 2.5981, -4.5000, 0.7935]
2438

2439
        We get just the series of real parts of ::
2440

2441
            sage: finance.TimeSeries([w[0]]) + w[1:].scale_time(2)
2442
            [45.0000, -4.5000, -4.5000, -4.5000, -4.5000]
2443

2444
        An example with an even number of terms::
2445

2446
            sage: v = finance.TimeSeries([1..10]); v
2447
            [1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, 10.0000]
2448
            sage: w = v.fft(); w
2449
            [55.0000, -5.0000, 15.3884, -5.0000, 6.8819, -5.0000, 3.6327, -5.0000, 1.6246, -5.0000]
2450
            sage: v.fft().ifft()
2451
            [1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, 10.0000]
2452
        """
2453
        import scipy.fftpack
2454
        if overwrite:
2455
            y = self
2456
        else:
2457
            y = self.__copy__()
2458
        w = y.numpy(copy=False)
2459
        scipy.fftpack.rfft(w, overwrite_x=1)
2460
        return y
2461

2462
    def ifft(self, bint overwrite=False):
2463
        r"""
2464
        Return the real discrete inverse fast Fourier transform of
2465
        ``self``, which is also a real time series.
2466

2467
        This is the inverse of ``fft()``.
2468

2469
        The returned real array contains
2470

2471
        .. MATH::
2472

2473
            [y(0),y(1),\dots,y(n-1)]
2474
                         
2475
        where for `n` is even
2476

2477
        .. MATH::
2478

2479
            y(j) 
2480
            = 
2481
            1/n \left(
2482
            \sum_{k=1}^{n/2-1} 
2483
            (x[2k-1]+\sqrt{-1} \cdot x[2k])
2484
            \cdot \exp(\sqrt{-1} \cdot jk \cdot 2pi/n)
2485
            + c.c. + x[0] + (-1)^j x[n-1]
2486
            \right)
2487
                      
2488
        and for `n` is odd
2489

2490
        .. MATH::
2491

2492
            y(j) 
2493
            = 
2494
            1/n \left(
2495
            \sum_{k=1}^{(n-1)/2} 
2496
            (x[2k-1]+\sqrt{-1} \cdot x[2k])
2497
            \cdot \exp(\sqrt{-1} \cdot jk \cdot 2pi/n)
2498
            + c.c. + x[0]
2499
            \right)
2500
                      
2501
        where `c.c.` denotes complex conjugate of preceding expression.
2502

2503
        EXAMPLES::
2504

2505
            sage: v = finance.TimeSeries([1..10]); v
2506
            [1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, 10.0000]
2507
            sage: v.ifft()
2508
            [5.1000, -5.6876, 1.4764, -1.0774, 0.4249, -0.1000, -0.2249, 0.6663, -1.2764, 1.6988]
2509
            sage: v.ifft().fft()
2510
            [1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, 10.0000]
2511
        """
2512
        import scipy.fftpack
2513
        if overwrite:
2514
            y = self
2515
        else:
2516
            y = self.__copy__()
2517
        w = y.numpy(copy=False)
2518
        scipy.fftpack.irfft(w, overwrite_x=1)
2519
        return y
2520
        
2521

2522
cdef new_time_series(Py_ssize_t length):
2523
    """
2524
    Return a new uninitialized time series of the given length.
2525
    The entries of the time series are garbage.
2526

2527
    INPUT:
2528

2529
    - ``length`` -- integer
2530

2531
    OUTPUT:
2532

2533
    A time series.
2534

2535
    EXAMPLES:
2536

2537
    This uses ``new_time_series`` implicitly::
2538

2539
        sage: v = finance.TimeSeries([1,-3,4.5,-2])
2540
        sage: v.__copy__()
2541
        [1.0000, -3.0000, 4.5000, -2.0000]
2542
    """
2543
    if length < 0:
2544
        raise ValueError, "length must be nonnegative"
2545
    cdef TimeSeries t = PY_NEW(TimeSeries)
2546
    t._length = length
2547
    t._values = <double*> sage_malloc(sizeof(double)*length)
2548
    return t
2549

2550
def unpickle_time_series_v1(v, Py_ssize_t n):
2551
    """
2552
    Version 1 unpickle method.
2553

2554
    INPUT:
2555

2556
    - ``v`` -- a raw char buffer
2557

2558
    EXAMPLES::
2559

2560
        sage: v = finance.TimeSeries([1,2,3])
2561
        sage: s = v.__reduce__()[1][0]
2562
        sage: type(s)
2563
        <type 'str'>
2564
        sage: sage.finance.time_series.unpickle_time_series_v1(s,3)
2565
        [1.0000, 2.0000, 3.0000]
2566
        sage: sage.finance.time_series.unpickle_time_series_v1(s+s,6)
2567
        [1.0000, 2.0000, 3.0000, 1.0000, 2.0000, 3.0000]
2568
        sage: sage.finance.time_series.unpickle_time_series_v1('',0)
2569
        []
2570
    """
2571
    cdef TimeSeries t = new_time_series(n)
2572
    memcpy(t._values, PyString_AsString(v), n*sizeof(double))
2573
    return t
2574

2575

2576

2577

2578
def autoregressive_fit(acvs):
2579
    r"""
2580
    Given a sequence of lagged autocovariances of length `M` produce
2581
    `a_1,\dots,a_p` so that the first `M` autocovariance coefficients
2582
    of the autoregressive processes `X_t=a_1X_{t_1}+\cdots+a_pX_{t-p}+Z_t`
2583
    are the same as the input sequence.
2584
    
2585
    The function works by solving the Yule-Walker equations
2586
    `\Gamma a =\gamma`, where `\gamma=(\gamma(1),\dots,\gamma(M))`,
2587
    `a=(a_1,\dots,a_M)`, with `\gamma(i)` the autocovariance of lag `i`
2588
    and `\Gamma_{ij}=\gamma(i-j)`.
2589
    
2590
    EXAMPLES:
2591

2592
    In this example we consider the multifractal cascade random walk
2593
    of length 1000, and use simulations to estimate the
2594
    expected first few autocovariance parameters for this model, then
2595
    use them to construct a linear filter that works vastly better
2596
    than a linear filter constructed from the same data but not using
2597
    this model. The Monte-Carlo method illustrated below should work for
2598
    predicting one "time step" into the future for any
2599
    model that can be simulated.  To predict k time steps into the
2600
    future would require using a similar technique but would require
2601
    scaling time by k.
2602

2603
    We create 100 simulations of a multifractal random walk.  This
2604
    models the logarithms of a stock price sequence. ::
2605

2606
        sage: set_random_seed(0)
2607
        sage: y = finance.multifractal_cascade_random_walk_simulation(3700,0.02,0.01,0.01,1000,100)
2608

2609
    For each walk below we replace the walk by the walk but where each
2610
    step size is replaced by its absolute value -- this is what we
2611
    expect to be able to predict given the model, which is only a
2612
    model for predicting volatility.  We compute the first 200
2613
    autocovariance values for every random walk::
2614

2615
        sage: c = [[a.diffs().abs().sums().autocovariance(i) for a in y] for i in range(200)]
2616

2617
    We make a time series out of the expected values of the
2618
    autocovariances::
2619

2620
        sage: ac = finance.TimeSeries([finance.TimeSeries(z).mean() for z in c])
2621
        sage: ac
2622
        [3.9962, 3.9842, 3.9722, 3.9601, 3.9481 ... 1.7144, 1.7033, 1.6922, 1.6812, 1.6701]
2623

2624
    .. NOTE::
2625

2626
        ``ac`` looks like a line -- one could best fit it to yield a lot
2627
        more approximate autocovariances.
2628

2629
    We compute the autoregression coefficients matching the above
2630
    autocovariances::
2631

2632
        sage: F = finance.autoregressive_fit(ac); F
2633
        [0.9982, -0.0002, -0.0002, 0.0003, 0.0001 ... 0.0002, -0.0002, -0.0000, -0.0002, -0.0014]
2634

2635
    Note that the sum is close to 1::
2636

2637
        sage: sum(F)
2638
        0.99593284089454...
2639

2640
    Now we make up an 'out of sample' sequence::
2641

2642
        sage: y2 = finance.multifractal_cascade_random_walk_simulation(3700,0.02,0.01,0.01,1000,1)[0].diffs().abs().sums()
2643
        sage: y2
2644
        [0.0013, 0.0059, 0.0066, 0.0068, 0.0184 ... 6.8004, 6.8009, 6.8063, 6.8090, 6.8339]
2645

2646
    And we forecast the very last value using our linear filter; the forecast
2647
    is close::
2648

2649
        sage: y2[:-1].autoregressive_forecast(F)
2650
        6.7836741372407...
2651

2652
    In fact it is closer than we would get by forecasting using a
2653
    linear filter made from all the autocovariances of our sequence::
2654

2655
        sage: y2[:-1].autoregressive_forecast(y2[:-1].autoregressive_fit(len(y2)))
2656
        6.770168705668...
2657

2658
    We record the last 20 forecasts, always using all correct values up to the
2659
    one we are forecasting::
2660

2661
        sage: s1 = sum([(y2[:-i].autoregressive_forecast(F)-y2[-i])^2 for i in range(1,20)])
2662

2663
    We do the same, but using the autocovariances of the sample sequence::
2664

2665
        sage: F2 = y2[:-100].autoregressive_fit(len(F))
2666
        sage: s2 = sum([(y2[:-i].autoregressive_forecast(F2)-y2[-i])^2 for i in range(1,20)])
2667

2668
    Our model gives us something that is 15 percent better in this case::
2669

2670
        sage: s2/s1
2671
        1.15464636102...
2672

2673
    How does it compare overall?  To find out we do 100 simulations
2674
    and for each we compute the percent that our model beats naively
2675
    using the autocovariances of the sample::
2676

2677
        sage: y_out = finance.multifractal_cascade_random_walk_simulation(3700,0.02,0.01,0.01,1000,100)
2678
        sage: s1 = []; s2 = []
2679
        sage: for v in y_out:
2680
        ...       s1.append(sum([(v[:-i].autoregressive_forecast(F)-v[-i])^2 for i in range(1,20)]))
2681
        ...       F2 = v[:-len(F)].autoregressive_fit(len(F))
2682
        ...       s2.append(sum([(v[:-i].autoregressive_forecast(F2)-v[-i])^2 for i in range(1,20)]))
2683
        ...
2684

2685
    We find that overall the model beats naive linear forecasting by 35
2686
    percent! ::
2687

2688
        sage: s = finance.TimeSeries([s2[i]/s1[i] for i in range(len(s1))])
2689
        sage: s.mean()
2690
        1.354073591877...
2691
    """
2692
    cdef TimeSeries c
2693
    cdef Py_ssize_t i
2694

2695
    M = len(acvs)-1
2696

2697
    if M <= 0:
2698
        raise ValueError, "M must be positive"
2699

2700
    if not isinstance(acvs, TimeSeries):
2701
        c = TimeSeries(acvs)
2702
    else:
2703
        c = acvs
2704

2705
    # Also create the numpy vector v with entries c(1), ..., c(M).
2706
    v = c[1:].numpy()
2707

2708
    # 2. Create the autocovariances numpy 2d array A whose i,j entry
2709
    # is c(|i-j|).
2710
    import numpy
2711
    A = numpy.array([[c[abs(j-k)] for j in range(M)] for k in range(M)])
2712

2713
    # 3. Solve the equation A * x = v
2714
    x = numpy.linalg.solve(A, v)
2715

2716
    # 4. The entries of x give the linear filter coefficients.
2717
    return TimeSeries(x)
2718

2719
Product

Resources

Company