CoCalc -- chmm.pyx

GitHub Repository: sagemath/sagesmc
Path: blob/master/src/sage/stats/hmm/chmm.pyx
⁸⁸¹⁸ views
1
"""
2
Continuous Emission Hidden Markov Models
3

4
AUTHOR:
5

6
   - William Stein, 2010-03
7
"""
8

9
#############################################################################
10
#       Copyright (C) 2010 William Stein <[email protected]>
11
#  Distributed under the terms of the GNU General Public License (GPL)
12
#  The full text of the GPL is available at:
13
#                  http://www.gnu.org/licenses/
14
#############################################################################
15

16
include "sage/ext/stdsage.pxi"
17
include "sage/ext/cdefs.pxi"
18
include "sage/ext/interrupt.pxi"
19

20
cdef extern from "math.h":
21
    double log(double)
22
    double sqrt(double)
23
    double exp(double)
24
    int isnormal(double)
25
    int isfinite(double)
26

27
import math
28

29
from sage.misc.flatten  import flatten
30
from sage.matrix.matrix import is_Matrix
31

32
cdef double sqrt2pi = sqrt(2*math.pi)
33

34
from sage.finance.time_series cimport TimeSeries
35
from sage.stats.intlist cimport IntList
36

37
from hmm cimport HiddenMarkovModel
38
from util cimport HMM_Util
39
from distributions cimport GaussianMixtureDistribution
40

41
cdef HMM_Util util = HMM_Util()
42

43
from sage.misc.randstate cimport current_randstate, randstate
44

45

46
# TODO: DELETE THIS FUNCTION WHEN MOVE Gaussian stuff to distributions.pyx!!! (next version)
47
cdef double random_normal(double mean, double std, randstate rstate):
48
    """
49
    Return a number chosen randomly with given mean and standard deviation.
50

51
    INPUT:
52

53
        - ``mean`` -- double
54
        - ``std`` -- double, standard deviation
55
        - ``rstate`` -- a randstate object
56

57
    OUTPUT:
58

59
        - a double
60
    """
61
    # Ported from http://users.tkk.fi/~nbeijar/soft/terrain/source_o2/boxmuller.c
62
    # This the box muller algorithm.
63
    # Client code can get the current random state from:
64
    #         cdef randstate rstate = current_randstate()
65

66
    cdef double x1, x2, w, y1, y2
67
    while True:
68
        x1 = 2*rstate.c_rand_double() - 1
69
        x2 = 2*rstate.c_rand_double() - 1
70
        w = x1*x1 + x2*x2
71
        if w < 1: break
72
    w = sqrt( (-2*log(w))/w )
73
    y1 = x1 * w
74
    return mean + y1*std
75

76
cdef class GaussianHiddenMarkovModel(HiddenMarkovModel):
77
    """
78
    GaussianHiddenMarkovModel(A, B, pi)
79

80
    Gaussian emissions Hidden Markov Model.
81

82
    INPUT:
83

84
        - ``A`` -- matrix; the N x N transition matrix
85
        - ``B`` -- list of pairs (mu,sigma) that define the distributions
86
        - ``pi`` -- initial state probabilities
87
        - ``normalize`` --bool (default: True)
88

89
    EXAMPLES:
90

91
    We illustrate the primary functions with an example 2-state Gaussian HMM::
92

93
        sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,1), (-1,1)], [.5,.5]); m
94
        Gaussian Hidden Markov Model with 2 States
95
        Transition matrix:
96
        [0.1 0.9]
97
        [0.5 0.5]
98
        Emission parameters:
99
        [(1.0, 1.0), (-1.0, 1.0)]
100
        Initial probabilities: [0.5000, 0.5000]
101

102
    We query the defining transition matrix, emission parameters, and
103
    initial state probabilities::
104

105
        sage: m.transition_matrix()
106
        [0.1 0.9]
107
        [0.5 0.5]
108
        sage: m.emission_parameters()
109
        [(1.0, 1.0), (-1.0, 1.0)]
110
        sage: m.initial_probabilities()
111
        [0.5000, 0.5000]
112

113
    We obtain a sample sequence with 10 entries in it, and compute the
114
    logarithm of the probability of obtaining his sequence, given the
115
    model::
116

117
        sage: obs = m.sample(10); obs
118
        [-1.6835, 0.0635, -2.1688, 0.3043, -0.3188, -0.7835, 1.0398, -1.3558, 1.0882, 0.4050]
119
        sage: m.log_likelihood(obs)
120
        -15.2262338077988...
121

122
    We compute the Viterbi path, and probability that the given path
123
    of states produced obs::
124

125
        sage: m.viterbi(obs)
126
        ([1, 0, 1, 0, 1, 1, 0, 1, 0, 1], -16.67738270170788)
127

128
    We use the Baum-Welch iterative algorithm to find another model
129
    for which our observation sequence is more likely::
130

131
        sage: m.baum_welch(obs)
132
        (-10.6103334957397..., 14)
133
        sage: m.log_likelihood(obs)
134
        -10.6103334957397...
135

136
    Notice that running Baum-Welch changed our model::
137

138
        sage: m
139
        Gaussian Hidden Markov Model with 2 States
140
        Transition matrix:
141
        [   0.415498136619    0.584501863381]
142
        [   0.999999317425 6.82574625899e-07]
143
        Emission parameters:
144
        [(0.417888242712, 0.517310966436), (-1.50252086313, 0.508551283606)]
145
        Initial probabilities: [0.0000, 1.0000]
146
    """
147
    cdef TimeSeries B, prob
148
    cdef int n_out
149

150
    def __init__(self, A, B, pi, bint normalize=True):
151
        """
152
        Create a Gaussian emissions HMM with transition probability
153
        matrix A, normal emissions given by B, and initial state
154
        probability distribution pi.
155

156
        INPUT::
157

158
           - A -- a list of lists or a square N x N matrix, whose
159
             (i,j) entry gives the probability of transitioning from
160
             state i to state j.
161

162
           - B -- a list of N pairs (mu,std), where if B[i]=(mu,std),
163
             then the probability distribution associated with state i
164
             normal with mean mu and standard deviation std.
165

166
           - pi -- the probabilities of starting in each initial
167
             state, i.e,. pi[i] is the probability of starting in
168
             state i.
169

170
           - normalize --bool (default: True); if given, input is
171
             normalized to define valid probability distributions,
172
             e.g., the entries of A are made nonnegative and the rows
173
             sum to 1.
174

175
        EXAMPLES::
176

177

178
            sage: hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,1), (-1,1)], [.5,.5])
179
            Gaussian Hidden Markov Model with 2 States
180
            Transition matrix:
181
            [0.1 0.9]
182
            [0.5 0.5]
183
            Emission parameters:
184
            [(1.0, 1.0), (-1.0, 1.0)]
185
            Initial probabilities: [0.5000, 0.5000]
186

187
        We input a model in which both A and pi have to be
188
        renormalized to define valid probability distributions::
189

190
            sage: hmm.GaussianHiddenMarkovModel([[-1,.7],[.3,.4]], [(1,1), (-1,1)], [-1,.3])
191
            Gaussian Hidden Markov Model with 2 States
192
            Transition matrix:
193
            [           0.0            1.0]
194
            [0.428571428571 0.571428571429]
195
            Emission parameters:
196
            [(1.0, 1.0), (-1.0, 1.0)]
197
            Initial probabilities: [0.0000, 1.0000]
198

199
        Bad things can happen::
200

201
            sage: hmm.GaussianHiddenMarkovModel([[-1,.7],[.3,.4]], [(1,1), (-1,1)], [-1,.3], normalize=False)
202
            Gaussian Hidden Markov Model with 2 States
203
            Transition matrix:
204
            [-1.0  0.7]
205
            [ 0.3  0.4]
206
            ...
207
        """
208
        self.pi = util.initial_probs_to_TimeSeries(pi, normalize)
209
        self.N = len(self.pi)
210
        self.A = util.state_matrix_to_TimeSeries(A, self.N, normalize)
211

212
        # B should be a matrix of N rows, with column 0 the mean and 1
213
        # the standard deviation.
214
        if is_Matrix(B):
215
            B = B.list()
216
        else:
217
            B = flatten(B)
218
        self.B = TimeSeries(B)
219
        self.probability_init()
220

221
    def __cmp__(self, other):
222
        """
223
        Compare self and other, which must both be GaussianHiddenMarkovModel's.
224

225
        EXAMPLES::
226

227
            sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1])
228
            sage: n = hmm.GaussianHiddenMarkovModel([[1]], [(1,1)], [1])
229
            sage: m < n
230
            True
231
            sage: m == m
232
            True
233
            sage: n > m
234
            True
235
            sage: n < m
236
            False
237
        """
238
        if not isinstance(other, GaussianHiddenMarkovModel):
239
            raise ValueError
240
        return cmp(self.__reduce__()[1], other.__reduce__()[1])
241

242
    def __getitem__(self, Py_ssize_t i):
243
        """
244
        Return the mean and standard distribution for the i-th state.
245

246
        INPUT:
247

248
            - i -- integer
249

250
        OUTPUT:
251

252
            - 2 floats
253

254
        EXAMPLES::
255

256
            sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-2,.3)], [.5,.5])
257
            sage: m[0]
258
            (1.0, 0.5)
259
            sage: m[1]
260
            (-2.0, 0.3)
261
            sage: m[-1]
262
            (-2.0, 0.3)
263
            sage: m[3]
264
            Traceback (most recent call last):
265
            ...
266
            IndexError: index out of range
267
            sage: m[-3]
268
            Traceback (most recent call last):
269
            ...
270
            IndexError: index out of range
271
        """
272
        if i < 0:
273
            i += self.N
274
        if i < 0 or i >= self.N:
275
            raise IndexError, 'index out of range'
276

277
        # TODO: change to be a normal distribution class (next version)
278
        return self.B[2*i], self.B[2*i+1]
279

280
    def __reduce__(self):
281
        """
282
        Used in pickling.
283

284
        EXAMPLES::
285

286
            sage: G = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1])
287
            sage: loads(dumps(G)) == G
288
            True
289
        """
290
        return unpickle_gaussian_hmm_v1, \
291
               (self.A, self.B, self.pi, self.prob, self.n_out)
292

293
    def emission_parameters(self):
294
        """
295
        Return the parameters that define the normal distributions
296
        associated to all of the states.
297

298
        OUTPUT:
299

300
            - a list B of pairs B[i] = (mu, std), such that the
301
              distribution associated to state i is normal with mean
302
              mu and standard deviation std.
303

304
        EXAMPLES::
305

306
            sage: hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9]).emission_parameters()
307
            [(1.0, 0.5), (-1.0, 3.0)]
308
        """
309
        cdef Py_ssize_t i
310
        from sage.rings.all import RDF
311
        return [(RDF(self.B[2*i]),RDF(self.B[2*i+1])) for i in range(self.N)]
312

313
    def __repr__(self):
314
        """
315
        Return string representation.
316

317
        EXAMPLES::
318

319
            sage: hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9]).__repr__()
320
            'Gaussian Hidden Markov Model with 2 States\nTransition matrix:\n[0.1 0.9]\n[0.5 0.5]\nEmission parameters:\n[(1.0, 0.5), (-1.0, 3.0)]\nInitial probabilities: [0.1000, 0.9000]'
321
        """
322
        s = "Gaussian Hidden Markov Model with %s States"%self.N
323
        s += '\nTransition matrix:\n%s'%self.transition_matrix()
324
        s += '\nEmission parameters:\n%s'%self.emission_parameters()
325
        s += '\nInitial probabilities: %s'%self.initial_probabilities()
326
        return s
327

328

329
    def generate_sequence(self, Py_ssize_t length, starting_state=None):
330
        """
331
        Return a sample of the given length from this HMM.
332

333
        INPUT:
334

335
            - length -- positive integer
336
            - starting_state -- int (or None); if specified then generate
337
              a sequence using this model starting with the given state
338
              instead of the initial probabilities to determine the
339
              starting state.
340

341
        OUTPUT:
342

343
            - an IntList or list of emission symbols
344
            - TimeSeries of emissions
345

346
        EXAMPLES::
347

348
            sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
349
            sage: m.generate_sequence(5)
350
            ([-3.0505, 0.5317, -4.5065, 0.6521, 1.0435], [1, 0, 1, 0, 1])
351
            sage: m.generate_sequence(0)
352
            ([], [])
353
            sage: m.generate_sequence(-1)
354
            Traceback (most recent call last):
355
            ...
356
            ValueError: length must be nonnegative
357

358
        Example in which the starting state is 0 (see trac 11452)::
359

360
            sage: set_random_seed(23);  m.generate_sequence(2)
361
            ([0.6501, -2.0151], [0, 1])
362

363
        Force a starting state of 1 even though as we saw above it would be 0::
364

365
            sage: set_random_seed(23);  m.generate_sequence(2, starting_state=1)
366
            ([-3.1491, -1.0244], [1, 1])
367

368
        Verify numerically that the starting state is 0 with probability about 0.1::
369

370
            sage: set_random_seed(0)
371
            sage: v = [m.generate_sequence(1)[1][0] for i in range(10^5)]
372
            sage: 1.0 * v.count(int(0)) / len(v)
373
            0.0998200000000000
374
        """
375
        if length < 0:
376
            raise ValueError, "length must be nonnegative"
377

378
        # Create Integer lists for states and observations
379
        cdef IntList states = IntList(length)
380
        cdef TimeSeries obs = TimeSeries(length)
381
        if length == 0:
382
            return states, obs
383

384
        # Setup variables, including random state.
385
        cdef Py_ssize_t i, j
386
        cdef randstate rstate = current_randstate()
387
        cdef int q = 0
388
        cdef double r, accum
389

390
        # Choose the starting state.
391
        # See the remark in hmm.pyx about how this should get
392
        # replaced by some general fast discrete distribution code.
393
        if starting_state is None:
394
            r = rstate.c_rand_double()
395
            accum = 0
396
            for i in range(self.N):
397
                if r < self.pi._values[i] + accum:
398
                    q = i
399
                    break
400
                else:
401
                    accum += self.pi._values[i]
402
        else:
403
            q = starting_state
404
            if q < 0 or q>= self.N:
405
                raise ValueError, "starting state must be between 0 and %s"%(self.N-1)
406

407
        states._values[0] = q
408
        obs._values[0] = self.random_sample(q, rstate)
409

410
        cdef double* row
411
        cdef int O
412
        sig_on()
413
        for i in range(1, length):
414
            accum = 0
415
            row = self.A._values + q*self.N
416
            r = rstate.c_rand_double()
417
            for j in range(self.N):
418
                if r < row[j] + accum:
419
                    q = j
420
                    break
421
                else:
422
                    accum += row[j]
423
            states._values[i] = q
424
            obs._values[i] = self.random_sample(q, rstate)
425
        sig_off()
426

427
        return obs, states
428

429
    cdef probability_init(self):
430
        """
431
        Used internally to compute caching information that makes
432
        certain computations in the Baum-Welch algorithm faster.  This
433
        function has no input or output.
434
        """
435
        self.prob = TimeSeries(2*self.N)
436
        cdef int i
437
        for i in range(self.N):
438
            self.prob[2*i] = 1.0/(sqrt2pi*self.B[2*i+1])
439
            self.prob[2*i+1] = -1.0/(2*self.B[2*i+1]*self.B[2*i+1])
440

441
    cdef double random_sample(self, int state, randstate rstate):
442
        """
443
        Return a random sample from the normal distribution associated
444
        to the given state.
445

446
        This is only used internally, and no bounds or other error
447
        checking is done, so calling this improperly can lead to seg
448
        faults.
449

450
        INPUT:
451

452
            - state -- integer
453
            - rstate -- randstate instance
454

455
        OUTPUT:
456

457
            - double
458
        """
459
        return random_normal(self.B._values[state*2], self.B._values[state*2+1], rstate)
460

461
    cdef double probability_of(self, int state, double observation):
462
        """
463
        Return a useful continuous analogue of "the probability b_j(o)"
464
        of seeing the given observation given that we're in the given
465
        state j (=state).
466

467
        The distribution is a normal distribution, and we're asking
468
        about the probability of a particular point being observed;
469
        the probability of a particular point is 0, which is not
470
        useful.  Thus we instead consider the limit p = prob([o,o+d])/d
471
        as d goes to 0.  There is a simple closed form formula for p,
472
        derived in the source code.  Note that p can be bigger than 1;
473
        for example, if we set observation=mean in the closed formula
474
        we get p=1/(sqrt(2*pi)*std), so p>1 when std<1/sqrt(2*pi).
475

476
        INPUT:
477

478
            - state -- integer
479
            - observation -- double
480

481
        OUTPUT:
482

483
            - double
484
        """
485
        # The code below is an optimized version of the following code:
486
        #     cdef double mean = self.B._values[2*state], \
487
        #                 std = self.B._values[2*state+1]
488
        #     return 1/(sqrt2pi*std) * \
489
        #             exp(-(observation-mean)*(observation-mean)/(2*std*std))
490
        #
491
        # Here is how to use Sage to verify that the above formula computes
492
        # the limit claimed above:
493
        #
494
        # var('x,d,obs,mean,std')
495
        # n = 1/sqrt(2*pi*std^2) * exp(-(x-mean)^2/(2*std^2))
496
        # assume(std>0); assume(d>0)
497
        # m = n.integrate(x,obs,obs+d)/d
498
        # p = SR(m.limit(d=0).simplify_full())
499
        # q = 1/(sqrt(2*pi)*std) * exp(-(obs-mean)*(obs-mean)/(2*std*std))
500
        # bool(p==q)  # outputs True
501

502
        cdef double x = observation - self.B._values[2*state] # observation - mean
503
        return self.prob._values[2*state] * exp(x*x*self.prob._values[2*state+1])
504

505
    def log_likelihood(self, obs):
506
        """
507
        Return the logarithm of a continuous analogue of the
508
        probability that this model produced the given observation
509
        sequence.
510

511
        Note that the "continuous analogue of the probability" above can
512
        be bigger than 1, hence the logarithm can be positive.
513

514
        INPUT:
515

516
            - obs -- sequence of observations
517

518
        OUTPUT:
519

520
            - float
521

522
        EXAMPLES::
523

524
            sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
525
            sage: m.log_likelihood([1,1,1])
526
            -4.297880766072486
527
            sage: set_random_seed(0); s = m.sample(20)
528
            sage: m.log_likelihood(s)
529
            -40.115714129484...
530
        """
531
        if len(obs) == 0:
532
            return 1.0
533
        if not isinstance(obs, TimeSeries):
534
            obs = TimeSeries(obs)
535
        return self._forward_scale(obs)
536

537
    def _forward_scale(self, TimeSeries obs):
538
        """
539
        Memory-efficient implementation of the forward algorithm (with scaling).
540

541
        INPUT:
542

543
            - obs -- an integer list of observation states.
544

545
        OUTPUT:
546

547
            - float -- the log of the probability that the model
548
              produced this sequence
549

550
        EXAMPLES::
551

552
            sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
553
            sage: m._forward_scale(stats.TimeSeries([1,-1,-1,1]))
554
            -7.641988207069133
555
        """
556
        cdef Py_ssize_t i, j, t, T = len(obs)
557

558
        # The running sum of the log probabilities
559
        cdef double log_probability = 0, sum, a
560

561
        cdef TimeSeries alpha  = TimeSeries(self.N), \
562
                        alpha2 = TimeSeries(self.N)
563

564
        # Initialization
565
        sum = 0
566
        for i in range(self.N):
567
            a = self.pi[i] * self.probability_of(i, obs._values[0])
568
            alpha[i] = a
569
            sum += a
570

571
        log_probability = log(sum)
572
        alpha.rescale(1/sum)
573

574
        # Induction
575
        cdef double s
576
        for t in range(1, T):
577
            sum = 0
578
            for j in range(self.N):
579
                s = 0
580
                for i in range(self.N):
581
                    s += alpha._values[i] * self.A._values[i*self.N + j]
582
                a = s * self.probability_of(j, obs._values[t])
583
                alpha2._values[j] = a
584
                sum += a
585

586
            log_probability += log(sum)
587
            for j in range(self.N):
588
                alpha._values[j] = alpha2._values[j] / sum
589

590
        # Termination
591
        return log_probability
592

593
    def viterbi(self, obs):
594
        """
595
        Determine "the" hidden sequence of states that is most likely
596
        to produce the given sequence seq of observations, along with
597
        the probability that this hidden sequence actually produced
598
        the observation.
599

600
        INPUT:
601

602
            - seq -- sequence of emitted ints or symbols
603

604
        OUTPUT:
605

606
            - list -- "the" most probable sequence of hidden states, i.e.,
607
              the Viterbi path.
608

609
            - float -- log of probability that the observed sequence
610
              was produced by the Viterbi sequence of states.
611

612
        EXAMPLES:
613

614
        We find the optimal state sequence for a given model::
615

616
            sage: m = hmm.GaussianHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [(0,1),(10,1)], [0.5,0.5])
617
            sage: m.viterbi([0,1,10,10,1])
618
            ([0, 0, 1, 1, 0], -9.0604285688230...)
619

620
        Another example in which the most likely states change based
621
        on the last observation::
622

623
            sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
624
            sage: m.viterbi([-2,-1,.1,0.1])
625
            ([1, 1, 0, 1], -9.61823698847639...)
626
            sage: m.viterbi([-2,-1,.1,0.3])
627
            ([1, 1, 1, 0], -9.566023653378513)
628
        """
629
        cdef TimeSeries _obs
630
        if not isinstance(obs, TimeSeries):
631
            _obs = TimeSeries(obs)
632
        else:
633
            _obs = obs
634

635
        # The algorithm is the same as _viterbi above, except
636
        # we take the logarithms of everything first, and add
637
        # instead of multiply.
638
        cdef Py_ssize_t t, T = _obs._length
639
        cdef IntList state_sequence = IntList(T)
640
        if T == 0:
641
            return state_sequence, 0.0
642

643
        cdef int i, j, N = self.N
644

645
        # delta[i] is the maximum of the probabilities over all
646
        # paths ending in state i.
647
        cdef TimeSeries delta = TimeSeries(N), delta_prev = TimeSeries(N)
648

649
        # We view psi as an N x T matrix of ints. The quantity
650
        #          psi[N*t + j]
651
        # is a most probable hidden state at time t-1, given
652
        # that j is a most probable state at time j.
653
        cdef IntList psi = IntList(N * T)  # initialized to 0 by default
654

655
        # Log Preprocessing
656
        cdef TimeSeries A = self.A.log()
657
        cdef TimeSeries pi = self.pi.log()
658

659
        # Initialization:
660
        for i in range(N):
661
            delta._values[i] = pi._values[i] + log(self.probability_of(i, _obs._values[0]))
662

663
        # Recursion:
664
        cdef double mx, tmp, minus_inf = float('-inf')
665
        cdef int index
666

667
        for t in range(1, T):
668
            delta_prev, delta = delta, delta_prev
669
            for j in range(N):
670
                # Compute delta_t[j] = max_i(delta_{t-1}(i) a_{i,j}) * b_j(_obs[t])
671
                mx = minus_inf
672
                index = -1
673
                for i in range(N):
674
                    tmp = delta_prev._values[i] + A._values[i*N+j]
675
                    if tmp > mx:
676
                        mx = tmp
677
                        index = i
678
                delta._values[j] = mx + log(self.probability_of(j, _obs._values[t]))
679
                psi._values[N*t + j] = index
680

681
        # Termination:
682
        mx, index = delta.max(index=True)
683

684
        # Path (state sequence) backtracking:
685
        state_sequence._values[T-1] = index
686
        t = T-2
687
        while t >= 0:
688
            state_sequence._values[t] = psi._values[N*(t+1) + state_sequence._values[t+1]]
689
            t -= 1
690

691
        return state_sequence, mx
692

693
    cdef TimeSeries _backward_scale_all(self, TimeSeries obs, TimeSeries scale):
694
        """
695
        This function returns the matrix beta_t(i), and is used
696
        internally as part of the Baum-Welch algorithm.
697

698
        The quantity beta_t(i) is the probability of observing the
699
        sequence obs[t+1:] assuming that the model is in state i at
700
        time t.
701

702
        INPUT:
703

704
            - obs -- TimeSeries
705
            - scale -- TimeSeries
706

707
        OUTPUT:
708

709
            - TimeSeries beta such that beta_t(i) = beta[t*N + i]
710
            - scale is also changed by this function
711
        """
712
        cdef Py_ssize_t t, T = obs._length
713
        cdef int N = self.N, i, j
714
        cdef double s
715
        cdef TimeSeries beta = TimeSeries(N*T, initialize=False)
716

717
        # 1. Initialization
718
        for i in range(N):
719
            beta._values[(T-1)*N + i] = 1 / scale._values[T-1]
720

721
        # 2. Induction
722
        t = T-2
723
        while t >= 0:
724
            for i in range(N):
725
                s = 0
726
                for j in range(N):
727
                    s += self.A._values[i*N+j] * \
728
                         self.probability_of(j, obs._values[t+1]) * beta._values[(t+1)*N+j]
729
                beta._values[t*N + i] = s/scale._values[t]
730
            t -= 1
731
        return beta
732

733
    cdef _forward_scale_all(self, TimeSeries obs):
734
        """
735
        Return scaled values alpha_t(i), the sequence of scalings, and
736
        the log probability.
737

738
        The quantity alpha_t(i) is the probability of observing the
739
        sequence obs[:t+1] assuming that the model is in state i at
740
        time t.
741

742
        INPUT:
743

744
            - obs -- TimeSeries
745

746
        OUTPUT:
747

748
            - TimeSeries alpha with alpha_t(i) = alpha[t*N + i]
749
            - TimeSeries scale with scale[t] the scaling at step t
750
            - float -- log_probability of the observation sequence
751
              being produced by the model.
752
        """
753
        cdef Py_ssize_t i, j, t, T = len(obs)
754
        cdef int N = self.N
755

756
        # The running some of the log probabilities
757
        cdef double log_probability = 0, sum, a
758

759
        cdef TimeSeries alpha = TimeSeries(N*T, initialize=False)
760
        cdef TimeSeries scale = TimeSeries(T, initialize=False)
761

762
        # Initialization
763
        sum = 0
764
        for i in range(self.N):
765
            a = self.pi._values[i] * self.probability_of(i, obs._values[0])
766
            alpha._values[0*N + i] = a
767
            sum += a
768

769
        scale._values[0] = sum
770
        log_probability = log(sum)
771
        for i in range(self.N):
772
            alpha._values[0*N + i] /= sum
773

774
        # Induction
775
        # The code below is just an optimized version of:
776
        #     alpha = (alpha * A).pairwise_product(B[O[t+1]])
777
        #     alpha = alpha.scale(1/alpha.sum())
778
        # along with keeping track of the log of the scaling factor.
779
        cdef double s
780
        for t in range(1,T):
781
            sum = 0
782
            for j in range(N):
783
                s = 0
784
                for i in range(N):
785
                    s += alpha._values[(t-1)*N + i] * self.A._values[i*N + j]
786
                a = s * self.probability_of(j, obs._values[t])
787
                alpha._values[t*N + j] = a
788
                sum += a
789

790
            log_probability += log(sum)
791
            scale._values[t] = sum
792
            for j in range(self.N):
793
                alpha._values[t*N + j] /= sum
794

795
        # Termination
796
        return alpha, scale, log_probability
797

798
    cdef TimeSeries _baum_welch_xi(self, TimeSeries alpha, TimeSeries beta, TimeSeries obs):
799
        """
800
        Used internally to compute the scaled quantity xi_t(i,j)
801
        appearing in the Baum-Welch reestimation algorithm.
802

803
        INPUT:
804

805
            - alpha -- TimeSeries as output by the scaled forward algorithm
806
            - beta -- TimeSeries as output by the scaled backward algorithm
807
            - obs -- TimeSeries of observations
808

809
        OUTPUT:
810

811
            - TimeSeries xi such that xi[t*N*N + i*N + j] = xi_t(i,j).
812
        """
813
        cdef int i, j, N = self.N
814
        cdef double sum
815
        cdef Py_ssize_t t, T = alpha._length//N
816
        cdef TimeSeries xi = TimeSeries(T*N*N, initialize=False)
817
        for t in range(T-1):
818
            sum = 0.0
819
            for i in range(N):
820
                for j in range(N):
821
                    xi._values[t*N*N+i*N+j] = alpha._values[t*N+i]*beta._values[(t+1)*N+j]*\
822
                       self.A._values[i*N+j] * self.probability_of(j, obs._values[t+1])
823
                    sum += xi._values[t*N*N+i*N+j]
824
            for i in range(N):
825
                for j in range(N):
826
                    xi._values[t*N*N+i*N+j] /= sum
827
        return xi
828

829
    def baum_welch(self, obs, int max_iter=500, double log_likelihood_cutoff=1e-4,
830
                   double min_sd=0.01, bint fix_emissions=False, bint v=False):
831
        """
832
        Given an observation sequence obs, improve this HMM using the
833
        Baum-Welch algorithm to increase the probability of observing obs.
834

835
        INPUT:
836

837
            - obs -- a time series of emissions
838

839
            - max_iter -- integer (default: 500) maximum number
840
              of Baum-Welch steps to take
841

842
            - log_likelihood_cutoff -- positive float (default: 1e-4);
843
              the minimal improvement in likelihood with respect to
844
              the last iteration required to continue. Relative value
845
              to log likelihood.
846

847
            - min_sd -- positive float (default: 0.01); when
848
              reestimating, the standard deviation of emissions is not
849
              allowed to be less than min_sd.
850

851
            - fix_emissions -- bool (default: False); if True, do not
852
              change emissions when updating
853

854
        OUTPUT:
855

856
            - changes the model in places, and returns the log
857
              likelihood and number of iterations.
858

859
        EXAMPLES::
860

861
            sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
862
            sage: m.log_likelihood([-2,-1,.1,0.1])
863
            -8.858282215986275
864
            sage: m.baum_welch([-2,-1,.1,0.1])
865
            (4.534646052182..., 7)
866
            sage: m.log_likelihood([-2,-1,.1,0.1])
867
            4.534646052182...
868
            sage: m
869
            Gaussian Hidden Markov Model with 2 States
870
            Transition matrix:
871
            [   0.999999999243 7.56983939444e-10]
872
            [   0.499984627912    0.500015372088]
873
            Emission parameters:
874
            [(0.1, 0.01), (-1.49995081476, 0.50007105049)]
875
            Initial probabilities: [0.0000, 1.0000]
876

877
        We illustrate bounding the standard deviation below.  Note that above we had
878
        different emission parameters when the min_sd was the default of 0.01::
879

880
            sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
881
            sage: m.baum_welch([-2,-1,.1,0.1], min_sd=1)
882
            (-4.07939572755..., 32)
883
            sage: m.emission_parameters()
884
            [(-0.2663018798..., 1.0), (-1.99850979..., 1.0)]
885

886
        We watch the log likelihoods of the model converge, step by step::
887

888
            sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
889
            sage: v = m.sample(10)
890
            sage: stats.TimeSeries([m.baum_welch(v,max_iter=1)[0] for _ in range(len(v))])
891
            [-20.1167, -17.7611, -16.9814, -16.9364, -16.9314, -16.9309, -16.9309, -16.9309, -16.9309, -16.9309]
892

893
        We illustrate fixing emissions::
894

895
            sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.9,.1]], [(1,2),(-1,.5)], [.3,.7])
896
            sage: set_random_seed(0); v = m.sample(100)
897
            sage: m.baum_welch(v,fix_emissions=True)
898
            (-164.72944548204..., 23)
899
            sage: m.emission_parameters()
900
            [(1.0, 2.0), (-1.0, 0.5)]
901
            sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.9,.1]], [(1,2),(-1,.5)], [.3,.7])
902
            sage: m.baum_welch(v)
903
            (-162.854370397998..., 49)
904
            sage: m.emission_parameters()
905
            [(1.27224191726, 2.37136875176), (-0.948617467518, 0.576236038512)]
906
        """
907
        if not isinstance(obs, TimeSeries):
908
            obs = TimeSeries(obs)
909
        cdef TimeSeries _obs = obs
910
        cdef TimeSeries alpha, beta, scale, gamma, xi
911
        cdef double log_probability, log_probability0, log_probability_prev, delta
912
        cdef int i, j, k, N, n_iterations
913
        cdef Py_ssize_t t, T
914
        cdef double denominator_A, numerator_A, denominator_B, numerator_mean, numerator_std
915

916
        # Initialization
917
        alpha, scale, log_probability0 = self._forward_scale_all(_obs)
918
        if not isfinite(log_probability0):
919
            return (0.0, 0)
920
        log_probability = log_probability0
921
        beta = self._backward_scale_all(_obs, scale)
922
        gamma = self._baum_welch_gamma(alpha, beta)
923
        xi = self._baum_welch_xi(alpha, beta, _obs)
924
        log_probability_prev = log_probability
925
        N = self.N
926
        n_iterations = 0
927
        T = len(_obs)
928

929
        # Re-estimation
930
        while True:
931
            # Reestimate
932
            for i in range(N):
933
                if not isfinite(gamma._values[0*N+i]):
934
                    # Before raising an error, leave self in a valid state.
935
                    util.normalize_probability_TimeSeries(self.pi, 0, self.pi._length)
936
                    raise RuntimeError, "impossible to compute gamma during reestimation"
937
                self.pi._values[i] = gamma._values[0*N+i]
938

939
            # Update the probabilities pi to define a valid discrete distribution
940
            util.normalize_probability_TimeSeries(self.pi, 0, self.pi._length)
941

942
            # Reestimate transition matrix and emission probabilities in
943
            # each state.
944
            for i in range(N):
945
                # Compute the updated transition matrix
946
                denominator_A = 0.0
947
                for t in range(T-1):
948
                    denominator_A += gamma._values[t*N+i]
949
                if not isnormal(denominator_A):
950
                    raise RuntimeError, "unable to re-estimate transition matrix"
951
                for j in range(N):
952
                    numerator_A = 0.0
953
                    for t in range(T-1):
954
                        numerator_A += xi._values[t*N*N+i*N+j]
955
                    self.A._values[i*N+j] = numerator_A / denominator_A
956

957
                # Rescale the i-th row of the transition matrix to be
958
                # a valid stochastic matrix:
959
                util.normalize_probability_TimeSeries(self.A, i*N, (i+1)*N)
960

961
                if not fix_emissions:
962
                    denominator_B = denominator_A + gamma._values[(T-1)*N + i]
963
                    if not isnormal(denominator_B):
964
                        raise RuntimeError, "unable to re-estimate emission probabilities"
965

966
                    numerator_mean = 0.0
967
                    numerator_std = 0.0
968
                    for t in range(T):
969
                        numerator_mean += gamma._values[t*N + i] * _obs._values[t]
970
                        numerator_std  += gamma._values[t*N + i] * \
971
                               (_obs._values[t] - self.B._values[2*i])*(_obs._values[t] - self.B._values[2*i])
972
                    # restimated mean
973
                    self.B._values[2*i] = numerator_mean / denominator_B
974
                    # restimated standard deviation
975
                    self.B._values[2*i+1] = sqrt(numerator_std / denominator_B)
976
                    if self.B._values[2*i+1] < min_sd:
977
                        self.B._values[2*i+1] = min_sd
978
                    self.probability_init()
979

980
            n_iterations += 1
981
            if n_iterations >= max_iter: break
982

983
            # Initialization for next iteration
984
            alpha, scale, log_probability0 = self._forward_scale_all(_obs)
985

986
            if not isfinite(log_probability0): break
987
            log_probability = log_probability0
988
            beta = self._backward_scale_all(_obs, scale)
989
            gamma = self._baum_welch_gamma(alpha, beta)
990
            xi = self._baum_welch_xi(alpha, beta, _obs)
991

992
            # Compute the difference between the log probability of
993
            # two iterations.
994
            delta = log_probability - log_probability_prev
995
            log_probability_prev = log_probability
996

997
            # If the log probability does not change by more than delta,
998
            # then terminate
999
            if delta >= 0 and delta <= log_likelihood_cutoff:
1000
                break
1001

1002
        return log_probability, n_iterations
1003

1004

1005
cdef class GaussianMixtureHiddenMarkovModel(GaussianHiddenMarkovModel):
1006
    """
1007
    GaussianMixtureHiddenMarkovModel(A, B, pi)
1008

1009
    Gaussian mixture Hidden Markov Model.
1010

1011
    INPUT:
1012

1013
        - ``A``  -- matrix; the N x N transition matrix
1014

1015
        - ``B`` -- list of mixture definitions for each state.  Each
1016
          state may have a varying number of gaussians with selection
1017
          probabilities that sum to 1 and encoded as (p,(mu,sigma))
1018

1019
        - ``pi`` -- initial state probabilities
1020

1021
        - ``normalize`` --bool (default: True); if given, input is
1022
          normalized to define valid probability distributions,
1023
          e.g., the entries of A are made nonnegative and the rows
1024
          sum to 1, and the probabilities in pi are normalized.
1025

1026
    EXAMPLES::
1027

1028
        sage: A  = [[0.5,0.5],[0.5,0.5]]
1029
        sage: B  = [[(0.9,(0.0,1.0)), (0.1,(1,10000))],[(1,(1,1)), (0,(0,0.1))]]
1030
        sage: hmm.GaussianMixtureHiddenMarkovModel(A, B, [1,0])
1031
        Gaussian Mixture Hidden Markov Model with 2 States
1032
        Transition matrix:
1033
        [0.5 0.5]
1034
        [0.5 0.5]
1035
        Emission parameters:
1036
        [0.9*N(0.0,1.0) + 0.1*N(1.0,10000.0), 1.0*N(1.0,1.0) + 0.0*N(0.0,0.1)]
1037
        Initial probabilities: [1.0000, 0.0000]
1038

1039
    TESTS:
1040

1041
    If a standard deviation is 0, it is normalized to be slightly bigger than 0.::
1042

1043
        sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(1,(0,0))]], [1])
1044
        Gaussian Mixture Hidden Markov Model with 1 States
1045
        Transition matrix:
1046
        [1.0]
1047
        Emission parameters:
1048
        [1.0*N(0.0,1e-08)]
1049
        Initial probabilities: [1.0000]
1050

1051
    We test that number of emission distributions must be the same as the number of states::
1052

1053
        sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [], [1])
1054
        Traceback (most recent call last):
1055
        ...
1056
        ValueError: number of GaussianMixtures must be the same as number of entries of pi
1057

1058
        sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [[]], [1])
1059
        Traceback (most recent call last):
1060
        ...
1061
        ValueError: must specify at least one component of the mixture model
1062

1063
    We test that the number of initial probabilities must equal the number of states::
1064

1065
        sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [[]], [1,2])
1066
        Traceback (most recent call last):
1067
        ...
1068
        ValueError: number of entries of transition matrix A must be the square of the number of entries of pi
1069
    """
1070

1071
    cdef object mixture # mixture
1072

1073
    def __init__(self, A, B, pi=None, bint normalize=True):
1074
        """
1075
        Initialize a Gaussian mixture hidden Markov model.
1076

1077
        EXAMPLES::
1078

1079
            sage: hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3])
1080
            Gaussian Mixture Hidden Markov Model with 2 States
1081
            Transition matrix:
1082
            [0.9 0.1]
1083
            [0.4 0.6]
1084
            Emission parameters:
1085
            [0.4*N(0.0,1.0) + 0.6*N(1.0,0.1), 1.0*N(0.0,1.0)]
1086
            Initial probabilities: [0.7000, 0.3000]
1087
        """
1088
        self.pi = util.initial_probs_to_TimeSeries(pi, normalize)
1089
        self.N = len(self.pi)
1090
        self.A = util.state_matrix_to_TimeSeries(A, self.N, normalize)
1091
        if self.N*self.N != len(self.A):
1092
            raise ValueError, "number of entries of transition matrix A must be the square of the number of entries of pi"
1093

1094
        self.mixture = [b if isinstance(b, GaussianMixtureDistribution) else \
1095
                            GaussianMixtureDistribution([flatten(x) for x in b]) for b in B]
1096
        if len(self.mixture) != self.N:
1097
            raise ValueError, "number of GaussianMixtures must be the same as number of entries of pi"
1098

1099
    def __repr__(self):
1100
        """
1101
        Return string representation.
1102

1103
        EXAMPLES::
1104

1105
            sage: hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3]).__repr__()
1106
            'Gaussian Mixture Hidden Markov Model with 2 States\nTransition matrix:\n[0.9 0.1]\n[0.4 0.6]\nEmission parameters:\n[0.4*N(0.0,1.0) + 0.6*N(1.0,0.1), 1.0*N(0.0,1.0)]\nInitial probabilities: [0.7000, 0.3000]'
1107
        """
1108
        s = "Gaussian Mixture Hidden Markov Model with %s States"%self.N
1109
        s += '\nTransition matrix:\n%s'%self.transition_matrix()
1110
        s += '\nEmission parameters:\n%s'%self.emission_parameters()
1111
        s += '\nInitial probabilities: %s'%self.initial_probabilities()
1112
        return s
1113

1114
    def __reduce__(self):
1115
        """
1116
        Used in pickling.
1117

1118
        EXAMPLES::
1119

1120
            sage: m = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(.4,(0,1)), (.6,(1,0.1))]], [1])
1121
            sage: loads(dumps(m)) == m
1122
            True
1123
        """
1124
        return unpickle_gaussian_mixture_hmm_v1, \
1125
               (self.A, self.B, self.pi, self.mixture)
1126

1127

1128
    def __cmp__(self, other):
1129
        """
1130
        Compare self and other, which must both be GaussianMixtureHiddenMarkovModel's.
1131

1132
        EXAMPLES::
1133

1134
            sage: m = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(.4,(0,1)), (.6,(1,0.1))]], [1])
1135
            sage: n = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(.5,(0,1)), (.5,(1,0.1))]], [1])
1136
            sage: m < n
1137
            True
1138
            sage: m == m
1139
            True
1140
            sage: n > m
1141
            True
1142
            sage: n < m
1143
            False
1144
        """
1145
        if not isinstance(other, GaussianMixtureHiddenMarkovModel):
1146
            raise ValueError
1147
        return cmp(self.__reduce__()[1], other.__reduce__()[1])
1148

1149
    def __getitem__(self, Py_ssize_t i):
1150
        """
1151
        Return the Gaussian mixture distribution associated to the
1152
        i-th state.
1153

1154
        INPUT:
1155

1156
            - i -- integer
1157

1158
        OUTPUT:
1159

1160
            - a Gaussian mixture distribution object
1161

1162
        EXAMPLES::
1163

1164
            sage: m = hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3])
1165
            sage: m[0]
1166
            0.4*N(0.0,1.0) + 0.6*N(1.0,0.1)
1167
            sage: m[1]
1168
            1.0*N(0.0,1.0)
1169

1170
        Negative indexing works::
1171

1172
            sage: m[-1]
1173
            1.0*N(0.0,1.0)
1174

1175
        Bounds are checked::
1176

1177
            sage: m[2]
1178
            Traceback (most recent call last):
1179
            ...
1180
            IndexError: index out of range
1181
            sage: m[-3]
1182
            Traceback (most recent call last):
1183
            ...
1184
            IndexError: index out of range
1185
        """
1186
        if i < 0:
1187
            i += self.N
1188
        if i < 0 or i >= self.N:
1189
            raise IndexError, 'index out of range'
1190
        return self.mixture[i]
1191

1192
    def emission_parameters(self):
1193
        """
1194
        Returns a list of all the emission distributions.
1195

1196
        OUTPUT:
1197

1198
            - list of Gaussian mixtures
1199

1200
        EXAMPLES::
1201

1202
            sage: m = hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3])
1203
            sage: m.emission_parameters()
1204
            [0.4*N(0.0,1.0) + 0.6*N(1.0,0.1), 1.0*N(0.0,1.0)]
1205
        """
1206
        return list(self.mixture)
1207

1208
    cdef double random_sample(self, int state, randstate rstate):
1209
        """
1210
        Return a random sample from the normal distribution associated
1211
        to the given state.
1212

1213
        This is only used internally, and no bounds or other error
1214
        checking is done, so calling this improperly can lead to seg
1215
        faults.
1216

1217
        INPUT:
1218

1219
            - state -- integer
1220
            - rstate -- randstate instance
1221

1222
        OUTPUT:
1223

1224
            - double
1225
        """
1226
        cdef GaussianMixtureDistribution G = self.mixture[state]
1227
        return G._sample(rstate)
1228

1229
    cdef double probability_of(self, int state, double observation):
1230
        """
1231
        Return the probability b_j(o) of see the given observation o
1232
        (=observation) given that we're in the given state j (=state).
1233

1234
        This is a continuous probability, so this really returns a
1235
        number p such that the probability of a value in the interval
1236
        [o,o+d] is p*d.
1237

1238
        INPUT:
1239

1240
            - state -- integer
1241
            - observation -- double
1242

1243
        OUTPUT:
1244

1245
            - double
1246
        """
1247
        cdef GaussianMixtureDistribution G = self.mixture[state]
1248
        return G.prob(observation)
1249

1250
    cdef TimeSeries _baum_welch_mixed_gamma(self, TimeSeries alpha, TimeSeries beta,
1251
                                            TimeSeries obs, int j):
1252
        """
1253
        Let gamma_t(j,m) be the m-component (in the mixture) of the
1254
        probability of being in state j at time t, given the
1255
        observation sequence.  This function outputs a TimeSeries v
1256
        such that v[m*T + t] gives gamma_t(j, m) where T is the number
1257
        of time steps.
1258

1259
        INPUT:
1260

1261
            - alpha -- TimeSeries
1262
            - beta -- TimeSeries
1263
            - obs -- TimeSeries
1264
            - j -- int
1265

1266
        OUTPUT:
1267

1268
            - TimeSeries
1269
        """
1270
        cdef int i, k, m, N = self.N
1271
        cdef Py_ssize_t t, T = alpha._length//N
1272

1273
        cdef double numer, alpha_minus, P, s, prob
1274
        cdef GaussianMixtureDistribution G = self.mixture[j]
1275
        cdef int M = len(G)
1276
        cdef TimeSeries mixed_gamma = TimeSeries(T*M)
1277

1278
        for t in range(T):
1279
            prob = self.probability_of(j, obs._values[t])
1280
            if prob == 0:
1281
                # If the probability of observing obs[t] in state j is 0, then
1282
                # all of the m-mixture components *have* to automatically be 0,
1283
                # since prob is the sum of those and they are all nonnegative.
1284
                for m in range(M):
1285
                    mixed_gamma._values[m*T + t] = 0
1286
            else:
1287
                # Compute the denominator we used when scaling gamma.
1288
                # The key thing is that this is consistent between
1289
                # gamma and mixed_gamma.
1290
                P = 0
1291
                for k in range(N):
1292
                    P += alpha._values[t*N+k]*beta._values[t*N+k]
1293

1294
                # Divide out the total probability, so we can multiply back in
1295
                # the m-components of the probability.
1296
                alpha_minus = alpha._values[t*N + j] / prob
1297
                for m in range(M):
1298
                    numer =  alpha_minus * G.prob_m(obs._values[t], m) * beta._values[t*N + j]
1299
                    mixed_gamma._values[m*T + t] = numer / P
1300

1301
        return mixed_gamma
1302

1303
    def baum_welch(self, obs, int max_iter=1000, double log_likelihood_cutoff=1e-12,
1304
                   double min_sd=0.01, bint fix_emissions=False):
1305
        """
1306
        Given an observation sequence obs, improve this HMM using the
1307
        Baum-Welch algorithm to increase the probability of observing obs.
1308

1309
        INPUT:
1310

1311
            - obs -- a time series of emissions
1312
            - max_iter -- integer (default: 1000) maximum number
1313
              of Baum-Welch steps to take
1314
            - log_likelihood_cutoff -- positive float (default: 1e-12);
1315
              the minimal improvement in likelihood with respect to
1316
              the last iteration required to continue. Relative value
1317
              to log likelihood.
1318
            - min_sd -- positive float (default: 0.01); when
1319
              reestimating, the standard deviation of emissions is not
1320
              allowed to be less than min_sd.
1321
            - fix_emissions -- bool (default: False); if True, do not
1322
              change emissions when updating
1323

1324
        OUTPUT:
1325

1326
            - changes the model in places, and returns the log
1327
              likelihood and number of iterations.
1328

1329
        EXAMPLES::
1330

1331
            sage: m = hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3])
1332
            sage: set_random_seed(0); v = m.sample(10); v
1333
            [0.3576, -0.9365, 0.9449, -0.6957, 1.0217, 0.9644, 0.9987, -0.5950, -1.0219, 0.6477]
1334
            sage: m.log_likelihood(v)
1335
            -8.31408655939536...
1336
            sage: m.baum_welch(v)
1337
            (2.18905068682..., 15)
1338
            sage: m.log_likelihood(v)
1339
            2.18905068682...
1340
            sage: m
1341
            Gaussian Mixture Hidden Markov Model with 2 States
1342
            Transition matrix:
1343
            [   0.874636333977    0.125363666023]
1344
            [              1.0 1.45168520229e-40]
1345
            Emission parameters:
1346
            [0.500161629343*N(-0.812298726239,0.173329026744) + 0.499838370657*N(0.982433690378,0.029719932009), 1.0*N(0.503260056832,0.145881515324)]
1347
            Initial probabilities: [0.0000, 1.0000]
1348

1349
        We illustrate bounding the standard deviation below.  Note that above we had
1350
        different emission parameters when the min_sd was the default of 0.01::
1351

1352
            sage: m = hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3])
1353
            sage: m.baum_welch(v, min_sd=1)
1354
            (-12.617885761692..., 1000)
1355
            sage: m.emission_parameters()
1356
            [0.503545634447*N(0.200166509595,1.0) + 0.496454365553*N(0.200166509595,1.0), 1.0*N(0.0543433426535,1.0)]
1357

1358
        We illustrate fixing all emissions::
1359

1360
            sage: m = hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3])
1361
            sage: set_random_seed(0); v = m.sample(10)
1362
            sage: m.baum_welch(v, fix_emissions=True)
1363
            (-7.58656858997..., 36)
1364
            sage: m.emission_parameters()
1365
            [0.4*N(0.0,1.0) + 0.6*N(1.0,0.1), 1.0*N(0.0,1.0)]
1366
        """
1367
        if not isinstance(obs, TimeSeries):
1368
            obs = TimeSeries(obs)
1369
        cdef TimeSeries _obs = obs
1370
        cdef TimeSeries alpha, beta, scale, gamma, mixed_gamma, mixed_gamma_m, xi
1371
        cdef double log_probability, log_probability0, log_probability_prev, delta
1372
        cdef int i, j, k, m, N, n_iterations
1373
        cdef Py_ssize_t t, T
1374
        cdef double denominator_A, numerator_A, denominator_B, numerator_mean, numerator_std, \
1375
             numerator_c, c, mu, std, numer, denom, new_mu, new_std, new_c, s
1376
        cdef GaussianMixtureDistribution G
1377

1378
        # Initialization
1379
        alpha, scale, log_probability0 = self._forward_scale_all(_obs)
1380
        if not isfinite(log_probability0):
1381
            return (0.0, 0)
1382
        log_probability = log_probability0
1383
        beta = self._backward_scale_all(_obs, scale)
1384
        gamma = self._baum_welch_gamma(alpha, beta)
1385
        xi = self._baum_welch_xi(alpha, beta, _obs)
1386
        log_probability_prev = log_probability
1387
        N = self.N
1388
        n_iterations = 0
1389
        T = len(_obs)
1390

1391
        # Re-estimation
1392
        while True:
1393

1394
            # Reestimate frequency of state i in time t=0
1395
            for i in range(N):
1396
                if not isfinite(gamma._values[0*N+i]):
1397
                    # Before raising an error, leave self in a valid state.
1398
                    util.normalize_probability_TimeSeries(self.pi, 0, self.pi._length)
1399
                    raise RuntimeError, "impossible to compute gamma during reestimation"
1400
                self.pi._values[i] = gamma._values[0*N+i]
1401

1402
            # Update the probabilities pi to define a valid discrete distribution
1403
            util.normalize_probability_TimeSeries(self.pi, 0, self.pi._length)
1404

1405
            # Reestimate transition matrix and emission probabilities in
1406
            # each state.
1407
            for i in range(N):
1408
                # Reestimate the state transition matrix
1409
                denominator_A = 0.0
1410
                for t in range(T-1):
1411
                    denominator_A += gamma._values[t*N+i]
1412
                if not isnormal(denominator_A):
1413
                    raise RuntimeError, "unable to re-estimate pi (1)"
1414
                for j in range(N):
1415
                    numerator_A = 0.0
1416
                    for t in range(T-1):
1417
                        numerator_A += xi._values[t*N*N+i*N+j]
1418
                    self.A._values[i*N+j] = numerator_A / denominator_A
1419

1420
                # Rescale the i-th row of the transition matrix to be
1421
                # a valid stochastic matrix:
1422
                util.normalize_probability_TimeSeries(self.A, i*N, (i+1)*N)
1423

1424
                ########################################################################
1425
                # Re-estimate the emission probabilities
1426
                ########################################################################
1427
                G = self.mixture[i]
1428
                if not fix_emissions and not G.is_fixed():
1429
                    mixed_gamma = self._baum_welch_mixed_gamma(alpha, beta, _obs, i)
1430
                    new_G = []
1431
                    for m in range(len(G)):
1432
                        if G.fixed._values[m]:
1433
                            new_G.append(G[m])
1434
                            continue
1435

1436
                        # Compute re-estimated mu_{j,m}
1437
                        numer = 0
1438
                        denom = 0
1439
                        for t in range(T):
1440
                            numer += mixed_gamma._values[m*T + t] * _obs._values[t]
1441
                            denom += mixed_gamma._values[m*T + t]
1442
                        new_mu = numer / denom
1443

1444
                        # Compute re-estimated standard deviation
1445
                        numer = 0
1446
                        mu = G[m][1]
1447
                        for t in range(T):
1448
                            numer += mixed_gamma._values[m*T + t] * \
1449
                                     (_obs._values[t] - mu)*(_obs._values[t] - mu)
1450

1451
                        new_std = sqrt(numer / denom)
1452
                        if new_std < min_sd:
1453
                            new_std = min_sd
1454

1455
                        # Compute re-estimated weighting coefficient
1456
                        new_c = denom
1457
                        s = 0
1458
                        for t in range(T):
1459
                            s += gamma._values[t*N + i]
1460
                        new_c /= s
1461

1462
                        new_G.append((new_c,new_mu,new_std))
1463

1464
                    self.mixture[i] = GaussianMixtureDistribution(new_G)
1465

1466
            n_iterations += 1
1467
            if n_iterations >= max_iter: break
1468

1469
            ########################################################################
1470
            # Initialization for next iteration
1471
            ########################################################################
1472
            alpha, scale, log_probability0 = self._forward_scale_all(_obs)
1473
            if not isfinite(log_probability0): break
1474
            log_probability = log_probability0
1475
            beta = self._backward_scale_all(_obs, scale)
1476
            gamma = self._baum_welch_gamma(alpha, beta)
1477
            xi = self._baum_welch_xi(alpha, beta, _obs)
1478

1479
            # Compute the difference between the log probability of
1480
            # two iterations.
1481
            delta = log_probability - log_probability_prev
1482
            log_probability_prev = log_probability
1483

1484
            # If the log probability does not improve by more than
1485
            # delta, then terminate
1486
            if delta >= 0 and delta <= log_likelihood_cutoff:
1487
                break
1488

1489
        return log_probability, n_iterations
1490

1491

1492
##################################################
1493
# For Unpickling
1494
##################################################
1495

1496
# We keep the _v0 function for backwards compatible.
1497
def unpickle_gaussian_hmm_v0(A, B, pi, name):
1498
    """
1499
    EXAMPLES::
1500

1501
        sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1])
1502
        sage: sage.stats.hmm.chmm.unpickle_gaussian_hmm_v0(m.transition_matrix(), m.emission_parameters(), m.initial_probabilities(), 'test')
1503
        Gaussian Hidden Markov Model with 1 States
1504
        Transition matrix:
1505
        [1.0]
1506
        Emission parameters:
1507
        [(0.0, 1.0)]
1508
        Initial probabilities: [1.0000]
1509
    """
1510
    return GaussianHiddenMarkovModel(A,B,pi)
1511

1512

1513
def unpickle_gaussian_hmm_v1(A, B, pi, prob, n_out):
1514
    """
1515
    EXAMPLES::
1516

1517
        sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1])
1518
        sage: loads(dumps(m)) == m   # indirect test
1519
        True
1520
    """
1521
    cdef GaussianHiddenMarkovModel m = PY_NEW(GaussianHiddenMarkovModel)
1522
    m.A = A
1523
    m.B = B
1524
    m.pi = pi
1525
    m.prob = prob
1526
    m.n_out = n_out
1527
    return m
1528

1529
def unpickle_gaussian_mixture_hmm_v1(A, B, pi, mixture):
1530
    """
1531
    EXAMPLES::
1532

1533
        sage: m = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(.4,(0,1)), (.6,(1,0.1))]], [1])
1534
        sage: loads(dumps(m)) == m   # indirect test
1535
        True
1536
    """
1537
    cdef GaussianMixtureHiddenMarkovModel m = PY_NEW(GaussianMixtureHiddenMarkovModel)
1538
    m.A = A
1539
    m.B = B
1540
    m.pi = pi
1541
    m.mixture = mixture
1542
    return m
1543

1544

1545
Product

Resources

Company