CoCalc -- hmm.pyx

GitHub Repository: sagemath/sagesmc
Path: blob/master/src/sage/stats/hmm/hmm.pyx
⁸⁸¹⁸ views
1
"""
2
Hidden Markov Models
3

4
This is a complete pure-Cython optimized implementation of Hidden
5
Markov Models.  It fully supports Discrete, Gaussian, and Mixed
6
Gaussian emissions.
7

8
The best references for the basic HMM algorithms implemented here are:
9

10
   -  Tapas Kanungo's "Hidden Markov Models"
11

12
   -  Jackson's HMM tutorial:
13
        http://personal.ee.surrey.ac.uk/Personal/P.Jackson/tutorial/
14

15
LICENSE: Some of the code in this file is based on reading Kanungo's
16
GPLv2+ implementation of discrete HMM's, hence the present code must
17
be licensed with a GPLv2+ compatible license.
18

19
AUTHOR:
20

21
   - William Stein, 2010-03
22
"""
23

24
#############################################################################
25
#       Copyright (C) 2010 William Stein <[email protected]>
26
#  Distributed under the terms of the GNU General Public License (GPL) v2+.
27
#  The full text of the GPL is available at:
28
#                  http://www.gnu.org/licenses/
29
#############################################################################
30

31
include "sage/ext/stdsage.pxi"
32
include "sage/ext/cdefs.pxi"
33
include "sage/ext/interrupt.pxi"
34

35
cdef extern from "math.h":
36
    double log(double)
37

38
from sage.finance.time_series cimport TimeSeries
39
from sage.matrix.matrix import is_Matrix
40
from sage.matrix.all import matrix
41
from sage.misc.randstate cimport current_randstate, randstate
42

43
from util cimport HMM_Util
44

45
cdef HMM_Util util = HMM_Util()
46

47
###########################################
48

49
cdef class HiddenMarkovModel:
50
    """
51
    Abstract base class for all Hidden Markov Models.
52
    """
53
    def initial_probabilities(self):
54
        """
55
        Return the initial probabilities, which as a TimeSeries of
56
        length N, where N is the number of states of the Markov model.
57

58
        EXAMPLES::
59

60
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8])
61
            sage: pi = m.initial_probabilities(); pi
62
            [0.2000, 0.8000]
63
            sage: type(pi)
64
            <type 'sage.finance.time_series.TimeSeries'>
65

66
        The returned time series is a copy, so changing it does not
67
        change the model.
68

69
            sage: pi[0] = .1; pi[1] = .9
70
            sage: m.initial_probabilities()
71
            [0.2000, 0.8000]
72

73
        Some other models::
74

75
            sage: hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,1), (-1,1)], [.1,.9]).initial_probabilities()
76
            [0.1000, 0.9000]
77
            sage: hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3]).initial_probabilities()
78
            [0.7000, 0.3000]
79
        """
80
        return TimeSeries(self.pi)
81

82
    def transition_matrix(self):
83
        """
84
        Return the state transition matrix.
85

86
        OUTPUT:
87

88
            - a Sage matrix with real double precision (RDF) entries.
89

90
        EXAMPLES::
91

92
            sage: M = hmm.DiscreteHiddenMarkovModel([[0.7,0.3],[0.9,0.1]], [[0.5,.5],[.1,.9]], [0.3,0.7])
93
            sage: T = M.transition_matrix(); T
94
            [0.7 0.3]
95
            [0.9 0.1]
96

97
        The returned matrix is mutable, but changing it does not
98
        change the transition matrix for the model::
99

100
            sage: T[0,0] = .1; T[0,1] = .9
101
            sage: M.transition_matrix()
102
            [0.7 0.3]
103
            [0.9 0.1]
104

105
        Transition matrices for other types of models::
106

107
            sage: hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,1), (-1,1)], [.5,.5]).transition_matrix()
108
            [0.1 0.9]
109
            [0.5 0.5]
110
            sage: hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3]).transition_matrix()
111
            [0.9 0.1]
112
            [0.4 0.6]
113
        """
114
        from sage.matrix.constructor import matrix
115
        from sage.rings.all import RDF
116
        return matrix(RDF, self.N, self.A.list())
117

118
    def graph(self, eps=1e-3):
119
        """
120
        Create a weighted directed graph from the transition matrix,
121
        not including any edge with a probability less than eps.
122

123
        INPUT:
124

125
            - eps -- nonnegative real number
126

127
        OUTPUT:
128

129
            - a digraph
130

131
        EXAMPLES::
132

133
            sage: m = hmm.DiscreteHiddenMarkovModel([[.3,0,.7],[0,0,1],[.5,.5,0]], [[.5,.5,.2]]*3, [1/3]*3)
134
            sage: G = m.graph(); G
135
            Looped digraph on 3 vertices
136
            sage: G.edges()
137
            [(0, 0, 0.3), (0, 2, 0.7), (1, 2, 1.0), (2, 0, 0.5), (2, 1, 0.5)]
138
            sage: G.plot()
139
        """
140
        cdef int i, j
141
        m = self.transition_matrix()
142
        for i in range(self.N):
143
            for j in range(self.N):
144
                if m[i,j] < eps:
145
                    m[i,j] = 0
146
        from sage.graphs.all import DiGraph
147
        return DiGraph(m, weighted=True)
148

149
    def sample(self, Py_ssize_t length, number=None, starting_state=None):
150
        """
151
        Return number samples from this HMM of given length.
152

153
        INPUT:
154

155
            - ``length`` -- positive integer
156
            - ``number`` -- (default: None) if given, compute list of this many sample sequences
157
            - ``starting_state`` -- int (or None); if specified then generate
158
              a sequence using this model starting with the given state
159
              instead of the initial probabilities to determine the
160
              starting state.
161

162
        OUTPUT:
163

164
            - if number is not given, return a single TimeSeries.
165
            - if number is given, return a list of TimeSeries.
166

167
        EXAMPLES::
168

169
            sage: set_random_seed(0)
170
            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1])
171
            sage: print a.sample(10, 3)
172
            [[1, 0, 1, 1, 1, 1, 0, 1, 1, 1], [1, 1, 0, 0, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 0, 1, 0, 1, 1, 1]]
173
            sage: a.sample(15)
174
            [1, 1, 1, 1, 0 ... 1, 1, 1, 1, 1]
175
            sage: a.sample(3, 1)
176
            [[1, 1, 1]]
177
            sage: list(a.sample(1000)).count(0)
178
            88
179

180
        If the emission symbols are set::
181

182
            sage: set_random_seed(0)
183
            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', 'down'])
184
            sage: a.sample(10)
185
            ['down', 'up', 'down', 'down', 'down', 'down', 'up', 'up', 'up', 'up']
186

187
        Force a starting state::
188

189
            sage: set_random_seed(0); a.sample(10, starting_state=0)
190
            ['up', 'up', 'down', 'down', 'down', 'down', 'up', 'up', 'up', 'up']
191
        """
192
        if number is None:
193
            return self.generate_sequence(length, starting_state=starting_state)[0]
194

195
        cdef Py_ssize_t i
196
        return [self.generate_sequence(length, starting_state=starting_state)[0] for i in range(number)]
197

198

199
    #########################################################
200
    # Some internal funcitons used for various general
201
    # HMM algorithms.
202
    #########################################################
203
    cdef TimeSeries _baum_welch_gamma(self, TimeSeries alpha, TimeSeries beta):
204
        """
205
        Used internally to compute the scaled quantity gamma_t(j)
206
        appearing in the Baum-Welch reestimation algorithm.
207

208
        The quantity gamma_t(j) is the (scaled!) probability of being
209
        in state j at time t, given the observation sequence.
210

211
        INPUT:
212

213
            - ``alpha`` -- TimeSeries as output by the scaled forward algorithm
214
            - ``beta`` -- TimeSeries as output by the scaled backward algorithm
215

216
        OUTPUT:
217

218
            - TimeSeries gamma such that gamma[t*N+j] is gamma_t(j).
219
        """
220
        cdef int j, N = self.N
221
        cdef Py_ssize_t t, T = alpha._length//N
222
        cdef TimeSeries gamma = TimeSeries(alpha._length, initialize=False)
223
        cdef double denominator
224
        sig_on()
225
        for t in range(T):
226
            denominator = 0
227
            for j in range(N):
228
                gamma._values[t*N + j] = alpha._values[t*N + j] * beta._values[t*N + j]
229
                denominator += gamma._values[t*N + j]
230
            for j in range(N):
231
                gamma._values[t*N + j] /= denominator
232
        sig_off()
233
        return gamma
234

235

236
cdef class DiscreteHiddenMarkovModel(HiddenMarkovModel):
237
    """
238
    A discrete Hidden Markov model implemented using double precision
239
    floating point arithmetic.
240

241
    INPUT:
242

243
        - ``A`` -- a list of lists or a square N x N matrix, whose
244
          (i,j) entry gives the probability of transitioning from
245
          state i to state j.
246

247
        - ``B`` -- a list of N lists or a matrix with N rows, such that
248
          B[i,k] gives the probability of emitting symbol k while
249
          in state i.
250

251
        - ``pi`` -- the probabilities of starting in each initial
252
          state, i.e,. pi[i] is the probability of starting in
253
          state i.
254

255
        - ``emission_symbols`` -- None or list (default: None); if
256
          None, the emission_symbols are the ints [0..N-1], where N
257
          is the number of states.  Otherwise, they are the entries
258
          of the list emissions_symbols, which must all be hashable.
259

260
        - ``normalize`` --bool (default: True); if given, input is
261
          normalized to define valid probability distributions,
262
          e.g., the entries of A are made nonnegative and the rows
263
          sum to 1, and the probabilities in pi are normalized.
264

265
    EXAMPLES::
266

267
        sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.5,.5]); m
268
        Discrete Hidden Markov Model with 2 States and 2 Emissions
269
        Transition matrix:
270
        [0.4 0.6]
271
        [0.1 0.9]
272
        Emission matrix:
273
        [0.1 0.9]
274
        [0.5 0.5]
275
        Initial probabilities: [0.5000, 0.5000]
276
        sage: m.log_likelihood([0,1,0,1,0,1])
277
        -4.66693474691329...
278
        sage: m.viterbi([0,1,0,1,0,1])
279
        ([1, 1, 1, 1, 1, 1], -5.378832842208748)
280
        sage: m.baum_welch([0,1,0,1,0,1])
281
        (0.0, 22)
282
        sage: m
283
        Discrete Hidden Markov Model with 2 States and 2 Emissions
284
        Transition matrix:
285
        [1.0134345614...e-70               1.0]
286
        [              1.0 3.997435271...e-19]
287
        Emission matrix:
288
        [7.3802215662...e-54               1.0]
289
        [              1.0  3.99743526...e-19]
290
        Initial probabilities: [0.0000, 1.0000]
291
        sage: m.sample(10)
292
        [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
293
        sage: m.graph().plot()
294

295
    A 3-state model that happens to always outputs 'b'::
296

297
        sage: m = hmm.DiscreteHiddenMarkovModel([[1/3]*3]*3, [[0,1,0]]*3, [1/3]*3, ['a','b','c'])
298
        sage: m.sample(10)
299
        ['b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b']
300
    """
301
    cdef TimeSeries B
302
    cdef int n_out
303
    cdef object _emission_symbols, _emission_symbols_dict
304

305
    def __init__(self, A, B, pi, emission_symbols=None, bint normalize=True):
306
        """
307
        Create a discrete emissions HMM with transition probability
308
        matrix A, emission probabilities given by B, initial state
309
        probabilities pi, and given emission symbols (which default
310
        to the first few nonnegative integers).
311

312
        EXAMPLES::
313

314
            sage: hmm.DiscreteHiddenMarkovModel([.5,0,-1,.5], [[1],[1]],[.5,.5]).transition_matrix()
315
            [1.0 0.0]
316
            [0.0 1.0]
317
            sage: hmm.DiscreteHiddenMarkovModel([0,0,.1,.9], [[1],[1]],[.5,.5]).transition_matrix()
318
            [0.5 0.5]
319
            [0.1 0.9]
320
            sage: hmm.DiscreteHiddenMarkovModel([-1,-2,.1,.9], [[1],[1]],[.5,.5]).transition_matrix()
321
            [0.5 0.5]
322
            [0.1 0.9]
323
            sage: hmm.DiscreteHiddenMarkovModel([1,2,.1,1.2], [[1],[1]],[.5,.5]).transition_matrix()
324
            [ 0.333333333333  0.666666666667]
325
            [0.0769230769231  0.923076923077]
326
        """
327
        self.pi = util.initial_probs_to_TimeSeries(pi, normalize)
328
        self.N = len(self.pi)
329
        self.A = util.state_matrix_to_TimeSeries(A, self.N, normalize)
330
        self._emission_symbols = emission_symbols
331
        if self._emission_symbols is not None:
332
            self._emission_symbols_dict = dict([(y,x) for x,y in enumerate(emission_symbols)])
333

334
        if not is_Matrix(B):
335
            B = matrix(B)
336
        if B.nrows() != self.N:
337
            raise ValueError, "number of rows of B must equal number of states"
338
        self.B = TimeSeries(B.list())
339
        self.n_out = B.ncols()
340
        if emission_symbols is not None and len(emission_symbols) != self.n_out:
341
            raise ValueError, "number of emission symbols must equal number of output states"
342
        cdef Py_ssize_t i
343
        if normalize:
344
            for i in range(self.N):
345
                util.normalize_probability_TimeSeries(self.B, i*self.n_out, (i+1)*self.n_out)
346

347
    def __reduce__(self):
348
        """
349
        Used in pickling.
350

351
        EXAMPLES::
352

353
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [0,1], ['a','b'])
354
            sage: loads(dumps(m)) == m
355
            True
356
        """
357
        return unpickle_discrete_hmm_v1, \
358
               (self.A, self.B, self.pi, self.n_out, self._emission_symbols, self._emission_symbols_dict)
359

360
    def __cmp__(self, other):
361
        """
362
        EXAMPLES::
363

364
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[0.5,0.5]], [.5,.5])
365
            sage: n = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[0.5,0.5]], [1,0])
366
            sage: m == n
367
            False
368
            sage: m == m
369
            True
370
            sage: m < n
371
            True
372
            sage: n < m
373
            False
374
        """
375
        if not isinstance(other, DiscreteHiddenMarkovModel):
376
            raise ValueError
377
        return cmp(self.__reduce__()[1], other.__reduce__()[1])
378

379

380
    def emission_matrix(self):
381
        """
382
        Return the matrix whose i-th row specifies the emission
383
        probability distribution for the i-th state.  More precisely,
384
        the i,j entry of the matrix is the probability of the Markov
385
        model outputing the j-th symbol when it is in the i-th state.
386

387
        OUTPUT:
388

389
            - a Sage matrix with real double precision (RDF) entries.
390

391
        EXAMPLES::
392

393
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.5,.5])
394
            sage: E = m.emission_matrix(); E
395
            [0.1 0.9]
396
            [0.5 0.5]
397

398
        The returned matrix is mutable, but changing it does not
399
        change the transition matrix for the model::
400

401
            sage: E[0,0] = 0; E[0,1] = 1
402
            sage: m.emission_matrix()
403
            [0.1 0.9]
404
            [0.5 0.5]
405
        """
406
        from sage.matrix.constructor import matrix
407
        from sage.rings.all import RDF
408
        return matrix(RDF, self.N, self.n_out, self.B.list())
409

410

411
    def __repr__(self):
412
        """
413
        Return string representation of this discrete hidden Markov model.
414

415
        EXAMPLES::
416

417
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8])
418
            sage: m.__repr__()
419
            'Discrete Hidden Markov Model with 2 States and 2 Emissions\nTransition matrix:\n[0.4 0.6]\n[0.1 0.9]\nEmission matrix:\n[0.1 0.9]\n[0.5 0.5]\nInitial probabilities: [0.2000, 0.8000]'
420
        """
421
        s = "Discrete Hidden Markov Model with %s States and %s Emissions"%(
422
            self.N, self.n_out)
423
        s += '\nTransition matrix:\n%s'%self.transition_matrix()
424
        s += '\nEmission matrix:\n%s'%self.emission_matrix()
425
        s += '\nInitial probabilities: %s'%self.initial_probabilities()
426
        if self._emission_symbols is not None:
427
            s += '\nEmission symbols: %s'%self._emission_symbols
428
        return s
429

430
    def _emission_symbols_to_IntList(self, obs):
431
        """
432
        Internal function used to convert a list of emission symbols to an IntList.
433

434
        INPUT:
435

436
            - obs -- a list of objects
437

438
        OUTPUT:
439

440
            - an IntList
441

442
        EXAMPLES::
443

444
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8], ['smile', 'frown'])
445
            sage: m._emission_symbols_to_IntList(['frown','smile'])
446
            [1, 0]
447
        """
448
        d = self._emission_symbols_dict
449
        return IntList([d[x] for x in obs])
450

451
    def _IntList_to_emission_symbols(self, obs):
452
        """
453
        Internal function used to convert a list of emission symbols to an IntList.
454

455
        INPUT:
456

457
            - obs -- a list of objects
458

459
        OUTPUT:
460

461
            - an IntList
462

463
        EXAMPLES::
464

465
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8], ['smile', 'frown'])
466
            sage: m._IntList_to_emission_symbols(stats.IntList([0,0,1,0]))
467
            ['smile', 'smile', 'frown', 'smile']
468
        """
469
        d = self._emission_symbols
470
        return [d[x] for x in obs]
471

472
    def log_likelihood(self, obs, bint scale=True):
473
        """
474
        Return the logarithm of the probability that this model produced the given
475
        observation sequence.  Thus the output is a non-positive number.
476

477
        INPUT:
478

479
            - ``obs`` -- sequence of observations
480

481
            - ``scale`` -- boolean (default: True); if True, use rescaling
482
              to overoid loss of precision due to the very limited
483
              dynamic range of floats.  You should leave this as True
484
              unless the obs sequence is very small.
485

486
        EXAMPLES::
487

488
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8])
489
            sage: m.log_likelihood([0, 1, 0, 1, 1, 0, 1, 0, 0, 0])
490
            -7.3301308009370825
491
            sage: m.log_likelihood([0, 1, 0, 1, 1, 0, 1, 0, 0, 0], scale=False)
492
            -7.330130800937082
493
            sage: m.log_likelihood([])
494
            0.0
495

496
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8], ['happy','sad'])
497
            sage: m.log_likelihood(['happy','happy'])
498
            -1.6565295199679506
499
            sage: m.log_likelihood(['happy','sad'])
500
            -1.4731602941415523
501

502
        Overflow from not using the scale option::
503

504
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8])
505
            sage: m.log_likelihood([0,1]*1000, scale=True)
506
            -1433.820666652728
507
            sage: m.log_likelihood([0,1]*1000, scale=False)
508
            -inf
509
        """
510
        if len(obs) == 0:
511
            return 0.0
512
        if self._emission_symbols is not None:
513
            obs = self._emission_symbols_to_IntList(obs)
514
        if not isinstance(obs, IntList):
515
            obs = IntList(obs)
516
        if scale:
517
            return self._forward_scale(obs)
518
        else:
519
            return self._forward(obs)
520

521
    def _forward(self, IntList obs):
522
        """
523
        Memory-efficient implementation of the forward algorithm, without scaling.
524

525
        INPUT:
526

527
            - ``obs`` -- an integer list of observation states.
528

529
        OUTPUT:
530

531
            - ``float`` -- the log of the probability that the model produced this sequence
532

533
        EXAMPLES::
534

535
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8])
536
            sage: m._forward(stats.IntList([0,1]*10))
537
            -14.378663512219456
538

539
        The forward algorithm computes the log likelihood::
540

541
            sage: m.log_likelihood(stats.IntList([0,1]*10), scale=False)
542
            -14.378663512219456
543

544
        But numerical overflow will happen (without scaling) for long sequences::
545

546
            sage: m._forward(stats.IntList([0,1]*1000))
547
            -inf
548
        """
549
        if obs.max() > self.N or obs.min() < 0:
550
            raise ValueError, "invalid observation sequence, since it includes unknown states"
551

552
        cdef Py_ssize_t i, j, t, T = len(obs)
553

554
        cdef TimeSeries alpha  = TimeSeries(self.N), \
555
                        alpha2 = TimeSeries(self.N)
556

557
        # Initialization
558
        for i in range(self.N):
559
            alpha[i] = self.pi[i] * self.B[i*self.n_out + obs._values[0]]
560
        alpha[i] = self.pi[i] * self.B[i*self.n_out + obs._values[0]]
561

562
        # Induction
563
        cdef double s
564
        for t in range(1, T):
565
            for j in range(self.N):
566
                s = 0
567
                for i in range(self.N):
568
                    s += alpha._values[i] * self.A._values[i*self.N + j]
569
                alpha2._values[j] = s * self.B._values[j*self.n_out+obs._values[t]]
570
            for j in range(self.N):
571
                alpha._values[j] = alpha2._values[j]
572

573
        # Termination
574
        return log(alpha.sum())
575

576
    def _forward_scale(self, IntList obs):
577
        """
578
        Memory-efficient implementation of the forward algorithm, with scaling.
579

580
        INPUT:
581

582
            - ``obs`` -- an integer list of observation states.
583

584
        OUTPUT:
585

586
            - ``float`` -- the log of the probability that the model produced this sequence
587

588
        EXAMPLES::
589

590
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8])
591
            sage: m._forward_scale(stats.IntList([0,1]*10))
592
            -14.378663512219454
593

594
        The forward algorithm computes the log likelihood::
595

596
            sage: m.log_likelihood(stats.IntList([0,1]*10))
597
            -14.378663512219454
598

599
        Note that the scale algorithm ensures that numerical overflow
600
        won't happen for long sequences like it does with the forward
601
        non-scaled algorithm::
602

603
            sage: m._forward_scale(stats.IntList([0,1]*1000))
604
            -1433.820666652728
605
            sage: m._forward(stats.IntList([0,1]*1000))
606
            -inf
607

608
        A random sequence produced by the model is more likely::
609

610
            sage: set_random_seed(0); v = m.sample(1000)
611
            sage: m._forward_scale(v)
612
            -686.8753189365056
613
        """
614
        # This is just like self._forward(obs) above, except at every step of the
615
        # algorithm, we rescale the vector alpha so that the sum of
616
        # the entries in alpha is 1.  Then, at the end of the
617
        # algorithm, the sum of probabilities (alpha.sum()) is of
618
        # course just 1.  However, the true probability that we want
619
        # is the product of the numbers that we divided by when
620
        # rescaling, since at each step of the iteration the next term
621
        # depends linearly on the previous one.  Instead of returning
622
        # the product, we return the sum of the logs, which avoid
623
        # numerical error.
624
        cdef Py_ssize_t i, j, t, T = len(obs)
625

626
        # The running some of the log probabilities
627
        cdef double log_probability = 0, sum, a
628

629
        cdef TimeSeries alpha  = TimeSeries(self.N), \
630
                        alpha2 = TimeSeries(self.N)
631

632
        # Initialization
633
        sum = 0
634
        for i in range(self.N):
635
            a = self.pi[i] * self.B[i*self.n_out + obs._values[0]]
636
            alpha[i] = a
637
            sum += a
638

639
        log_probability = log(sum)
640
        alpha.rescale(1/sum)
641

642
        # Induction
643
        # The code below is just an optimized version of:
644
        #     alpha = (alpha * A).pairwise_product(B[O[t+1]])
645
        #     alpha = alpha.scale(1/alpha.sum())
646
        # along with keeping track of the log of the scaling factor.
647
        cdef double s
648
        for t in range(1, T):
649
            sum = 0
650
            for j in range(self.N):
651
                s = 0
652
                for i in range(self.N):
653
                    s += alpha._values[i] * self.A._values[i*self.N + j]
654
                a = s * self.B._values[j*self.n_out+obs._values[t]]
655
                alpha2._values[j] = a
656
                sum += a
657

658
            log_probability += log(sum)
659
            for j in range(self.N):
660
                alpha._values[j] = alpha2._values[j] / sum
661

662
        # Termination
663
        return log_probability
664

665
    def generate_sequence(self, Py_ssize_t length, starting_state=None):
666
        """
667
        Return a sample of the given length from this HMM.
668

669
        INPUT:
670

671
            - ``length`` -- positive integer
672
            - ``starting_state`` -- int (or None); if specified then generate
673
              a sequence using this model starting with the given state
674
              instead of the initial probabilities to determine the
675
              starting state.
676

677

678
        OUTPUT:
679

680
            - an IntList or list of emission symbols
681
            - IntList of the actual states the model was in when
682
              emitting the corresponding symbols
683

684
        EXAMPLES:
685

686
        In this example, the emission symbols are not set::
687

688
            sage: set_random_seed(0)
689
            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1])
690
            sage: a.generate_sequence(5)
691
            ([1, 0, 1, 1, 1], [1, 0, 1, 1, 1])
692
            sage: list(a.generate_sequence(1000)[0]).count(0)
693
            90
694

695
        Here the emission symbols are set::
696

697
            sage: set_random_seed(0)
698
            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', 'down'])
699
            sage: a.generate_sequence(5)
700
            (['down', 'up', 'down', 'down', 'down'], [1, 0, 1, 1, 1])
701

702
        Specify the starting state::
703

704
            sage: set_random_seed(0); a.generate_sequence(5, starting_state=0)
705
            (['up', 'up', 'down', 'down', 'down'], [0, 0, 1, 1, 1])
706
        """
707
        if length < 0:
708
            raise ValueError, "length must be nonnegative"
709

710
        # Create Integer lists for states and observations
711
        cdef IntList states = IntList(length)
712
        cdef IntList obs = IntList(length)
713
        if length == 0:
714
            # A special case
715
            if self._emission_symbols is None:
716
                return states, obs
717
            else:
718
                return states, []
719

720
        # Setup variables, including random state.
721
        cdef Py_ssize_t i, j
722
        cdef randstate rstate = current_randstate()
723
        cdef int q = 0
724
        cdef double r, accum
725
        r = rstate.c_rand_double()
726

727
        # Now choose random variables from our discrete distribution.
728

729
        # This standard naive algorithm has complexity that is linear
730
        # in the number of states.  It might be possible to replace it
731
        # by something that is more efficient.  However, make sure to
732
        # refactor this code into distribution.pyx before doing so.
733
        # Note that state switching involves the same algorithm
734
        # (below).  Use GSL as described here to make this O(1):
735
        #    http://www.gnu.org/software/gsl/manual/html_node/General-Discrete-Distributions.html
736

737
        # Choose initial state:
738
        if starting_state is None:
739
            accum = 0
740
            for i in range(self.N):
741
                if r < self.pi._values[i] + accum:
742
                    q = i
743
                else:
744
                    accum += self.pi._values[i]
745
        else:
746
            q = starting_state
747
            if q < 0 or q >= self.N:
748
                raise ValueError, "starting state must be between 0 and %s"%(self.N-1)
749

750
        states._values[0] = q
751
        # Generate a symbol from state q
752
        obs._values[0] = self._gen_symbol(q, rstate.c_rand_double())
753

754
        cdef double* row
755
        cdef int O
756
        sig_on()
757
        for i in range(1, length):
758
            # Choose next state
759
            accum = 0
760
            row = self.A._values + q*self.N
761
            r = rstate.c_rand_double()
762
            for j in range(self.N):
763
                if r < row[j] + accum:
764
                    q = j
765
                    break
766
                else:
767
                    accum += row[j]
768
            states._values[i] = q
769
            # Generate symbol from this new state q
770
            obs._values[i] = self._gen_symbol(q, rstate.c_rand_double())
771
        sig_off()
772

773
        if self._emission_symbols is None:
774
            # No emission symbol mapping
775
            return obs, states
776
        else:
777
            # Emission symbol mapping, so change our intlist into a list of symbols
778
            return self._IntList_to_emission_symbols(obs), states
779

780
    cdef int _gen_symbol(self, int q, double r):
781
        """
782
        Generate a symbol in state q using the randomly chosen
783
        floating point number r, which should be between 0 and 1.
784

785
        INPUT:
786

787
            - ``q`` -- a nonnegative integer, which specifies a state
788
            - ``r`` -- a real number between 0 and 1
789

790
        OUTPUT:
791

792
            - a nonnegative int
793

794
        EXAMPLES::
795

796
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.9,0.1]], [.2,.8])
797
            sage: set_random_seed(0)
798
            sage: m.generate_sequence(10)  # indirect test
799
            ([0, 1, 0, 0, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 1, 1, 0, 0, 0, 0])
800
        """
801
        cdef Py_ssize_t j
802
        cdef double a, accum = 0
803
        # See the comments above about switching to GSL for this; also note
804
        # that this should get factored out into a call to something
805
        # defined in distributions.pyx.
806
        for j in range(self.n_out):
807
            a = self.B._values[q*self.n_out + j]
808
            if r < a + accum:
809
                return j
810
            else:
811
                accum += a
812
        # None of the values was selected: shouldn't this only happen if the
813
        # distribution is invalid?   Anyway, this will get factored out.
814
        return self.n_out - 1
815

816
    def viterbi(self, obs, log_scale=True):
817
        """
818
        Determine "the" hidden sequence of states that is most likely
819
        to produce the given sequence seq of observations, along with
820
        the probability that this hidden sequence actually produced
821
        the observation.
822

823
        INPUT:
824

825
            - ``seq`` -- sequence of emitted ints or symbols
826

827
            - ``log_scale`` -- bool (default: True) whether to scale the
828
              sequence in order to avoid numerical overflow.
829

830
        OUTPUT:
831

832
            - ``list`` -- "the" most probable sequence of hidden states, i.e.,
833
              the Viterbi path.
834

835
            - ``float`` -- log of probability that the observed sequence
836
              was produced by the Viterbi sequence of states.
837

838
        EXAMPLES::
839

840
            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[0.9,0.1],[0.1,0.9]], [0.5,0.5])
841
            sage: a.viterbi([1,0,0,1,0,0,1,1])
842
            ([1, 0, 0, 1, ..., 0, 1, 1], -11.06245322477221...)
843

844
        We predict the state sequence when the emissions are 3/4 and 'abc'.::
845

846
            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[0.9,0.1],[0.1,0.9]], [0.5,0.5], [3/4, 'abc'])
847

848
        Note that state 0 is common below, despite the model trying hard to
849
        switch to state 1::
850

851
            sage: a.viterbi([3/4, 'abc', 'abc'] + [3/4]*10)
852
            ([0, 1, 1, 0, 0 ... 0, 0, 0, 0, 0], -25.299405845367794)
853
        """
854
        if self._emission_symbols is not None:
855
            obs = self._emission_symbols_to_IntList(obs)
856
        elif not isinstance(obs, IntList):
857
            obs = IntList(obs)
858
        if log_scale:
859
            return self._viterbi_scale(obs)
860
        else:
861
            return self._viterbi(obs)
862

863
    cpdef _viterbi(self, IntList obs):
864
        """
865
        Used internally to compute the viterbi path, without
866
        rescaling.  This can be useful for short sequences.
867

868
        INPUT:
869

870
            - ``obs`` -- IntList
871

872
        OUTPUT:
873

874
            - IntList (most likely state sequence)
875

876
            - log of probability that the observed sequence was
877
              produced by the Viterbi sequence of states.
878

879
        EXAMPLES::
880

881
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.9,0.1]], [[1,0],[0,1]], [.2,.8])
882
            sage: m._viterbi(stats.IntList([1]*5))
883
            ([1, 1, 1, 1, 1], -9.433483923290392)
884
            sage: m._viterbi(stats.IntList([0]*5))
885
            ([0, 0, 0, 0, 0], -10.819778284410283)
886

887
        Long sequences will overflow::
888

889
            sage: m._viterbi(stats.IntList([0]*1000))
890
            ([0, 0, 0, 0, 0 ... 0, 0, 0, 0, 0], -inf)
891
        """
892
        cdef Py_ssize_t t, T = obs._length
893
        cdef IntList state_sequence = IntList(T)
894
        if T == 0:
895
            return state_sequence, 0.0
896

897
        cdef int i, j, N = self.N
898

899
        # delta[i] is the maximum of the probabilities over all
900
        # paths ending in state i.
901
        cdef TimeSeries delta = TimeSeries(N), delta_prev = TimeSeries(N)
902

903
        # We view psi as an N x T matrix of ints. The quantity
904
        #          psi[N*t + j]
905
        # is a most probable hidden state at time t-1, given
906
        # that j is a most probable state at time j.
907
        cdef IntList psi = IntList(N * T)  # initialized to 0 by default
908

909
        # Initialization:
910
        for i in range(N):
911
            delta._values[i] = self.pi._values[i] * self.B._values[self.n_out*i + obs._values[0]]
912

913
        # Recursion:
914
        cdef double mx, tmp
915
        cdef int index
916
        for t in range(1, T):
917
            delta_prev, delta = delta, delta_prev
918
            for j in range(N):
919
                # delta_t[j] = max_i(delta_{t-1}(i) a_{i,j}) * b_j(obs[t])
920
                mx = -1
921
                index = -1
922
                for i in range(N):
923
                    tmp = delta_prev._values[i]*self.A._values[i*N+j]
924
                    if tmp > mx:
925
                        mx = tmp
926
                        index = i
927
                delta._values[j] = mx * self.B._values[self.n_out*j + obs._values[t]]
928
                psi._values[N*t + j] = index
929

930
        # Termination:
931
        mx, index = delta.max(index=True)
932

933
        # Path (state sequence) backtracking:
934
        state_sequence._values[T-1] = index
935
        t = T-2
936
        while t >= 0:
937
            state_sequence._values[t] = psi._values[N*(t+1) + state_sequence._values[t+1]]
938
            t -= 1
939

940
        return state_sequence, log(mx)
941

942

943
    cpdef _viterbi_scale(self, IntList obs):
944
        """
945
        Used internally to compute the viterbi path with rescaling.
946

947
        INPUT:
948

949
            - obs -- IntList
950

951
        OUTPUT:
952

953
            - IntList (most likely state sequence)
954

955
            - log of probability that the observed sequence was
956
              produced by the Viterbi sequence of states.
957

958
        EXAMPLES::
959

960
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.9,0.1]], [[.5,.5],[0,1]], [.2,.8])
961
            sage: m._viterbi_scale(stats.IntList([1]*10))
962
            ([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], -4.637124095034373)
963

964
        Long sequences should not overflow::
965

966
            sage: m._viterbi_scale(stats.IntList([1]*1000))
967
            ([1, 0, 1, 0, 1 ... 0, 1, 0, 1, 0], -452.05188897345...)
968
        """
969
        # The algorithm is the same as _viterbi above, except
970
        # we take the logarithms of everything first, and add
971
        # instead of multiply.
972
        cdef Py_ssize_t t, T = obs._length
973
        cdef IntList state_sequence = IntList(T)
974
        if T == 0:
975
            return state_sequence, 0.0
976
        cdef int i, j, N = self.N
977

978
        # delta[i] is the maximum of the probabilities over all
979
        # paths ending in state i.
980
        cdef TimeSeries delta = TimeSeries(N), delta_prev = TimeSeries(N)
981

982
        # We view psi as an N x T matrix of ints. The quantity
983
        #          psi[N*t + j]
984
        # is a most probable hidden state at time t-1, given
985
        # that j is a most probable state at time j.
986
        cdef IntList psi = IntList(N * T)  # initialized to 0 by default
987

988
        # Log Preprocessing
989
        cdef TimeSeries A = self.A.log()
990
        cdef TimeSeries B = self.B.log()
991
        cdef TimeSeries pi = self.pi.log()
992

993
        # Initialization:
994
        for i in range(N):
995
            delta._values[i] = pi._values[i] + B._values[self.n_out*i + obs._values[0]]
996

997
        # Recursion:
998
        cdef double mx, tmp, minus_inf = float('-inf')
999
        cdef int index
1000

1001
        for t in range(1, T):
1002
            delta_prev, delta = delta, delta_prev
1003
            for j in range(N):
1004
                # delta_t[j] = max_i(delta_{t-1}(i) a_{i,j}) * b_j(obs[t])
1005
                mx = minus_inf
1006
                index = -1
1007
                for i in range(N):
1008
                    tmp = delta_prev._values[i] + A._values[i*N+j]
1009
                    if tmp > mx:
1010
                        mx = tmp
1011
                        index = i
1012
                delta._values[j] = mx + B._values[self.n_out*j + obs._values[t]]
1013
                psi._values[N*t + j] = index
1014

1015
        # Termination:
1016
        mx, index = delta.max(index=True)
1017

1018
        # Path (state sequence) backtracking:
1019
        state_sequence._values[T-1] = index
1020
        t = T-2
1021
        while t >= 0:
1022
            state_sequence._values[t] = psi._values[N*(t+1) + state_sequence._values[t+1]]
1023
            t -= 1
1024

1025
        return state_sequence, mx
1026

1027
    cdef TimeSeries _backward_scale_all(self, IntList obs, TimeSeries scale):
1028
        r"""
1029
        Return the scaled matrix of values `\beta_t(i)` that appear in
1030
        the backtracking algorithm.  This function is used internally
1031
        by the Baum-Welch algorithm.
1032

1033
        The matrix is stored as a TimeSeries T, such that
1034
        `\beta_t(i) = T[t*N + i]` where N is the number of states of
1035
        the Hidden Markov Model.
1036

1037
        The quantity beta_t(i) is the probability of observing the
1038
        sequence obs[t+1:] assuming that the model is in state i at
1039
        time t.
1040

1041
        INPUT:
1042

1043
            - ``obs`` -- IntList
1044
            - ``scale`` -- series that is *changed* in place, so that
1045
              after calling this function, scale[t] is value that is
1046
              used to scale each of the `\beta_t(i)`.
1047

1048
        OUTPUT:
1049

1050
            - a TimeSeries of values beta_t(i).
1051
            - the input object scale is modified
1052
        """
1053
        cdef Py_ssize_t t, T = obs._length
1054
        cdef int N = self.N, i, j
1055
        cdef double s
1056
        cdef TimeSeries beta = TimeSeries(N*T, initialize=False)
1057

1058
        # 1. Initialization
1059
        for i in range(N):
1060
            beta._values[(T-1)*N + i] = 1/scale._values[T-1]
1061

1062
        # 2. Induction
1063
        t = T-2
1064
        while t >= 0:
1065
            for i in range(N):
1066
                s = 0
1067
                for j in range(N):
1068
                    s += self.A._values[i*N+j] * \
1069
                         self.B._values[j*self.n_out+obs._values[t+1]] * beta._values[(t+1)*N+j]
1070
                beta._values[t*N + i] = s/scale._values[t]
1071
            t -= 1
1072
        return beta
1073

1074
    cdef _forward_scale_all(self, IntList obs):
1075
        """
1076
        Return scaled values alpha_t(i), the sequence of scalings, and
1077
        the log probability.
1078

1079
        INPUT:
1080

1081
            - ``obs`` -- IntList
1082

1083
        OUTPUT:
1084

1085
            - TimeSeries alpha with alpha_t(i) = alpha[t*N + i]
1086
            - TimeSeries scale with scale[t] the scaling at step t
1087
            - float -- log_probability of the observation sequence
1088
              being produced by the model.
1089
        """
1090
        cdef Py_ssize_t i, j, t, T = len(obs)
1091
        cdef int N = self.N
1092

1093
        # The running some of the log probabilities
1094
        cdef double log_probability = 0, sum, a
1095

1096
        cdef TimeSeries alpha = TimeSeries(N*T, initialize=False)
1097
        cdef TimeSeries scale = TimeSeries(T, initialize=False)
1098

1099
        # Initialization
1100
        sum = 0
1101
        for i in range(self.N):
1102
            a = self.pi._values[i] * self.B._values[i*self.n_out + obs._values[0]]
1103
            alpha._values[0*N + i] = a
1104
            sum += a
1105

1106
        scale._values[0] = sum
1107
        log_probability = log(sum)
1108
        for i in range(self.N):
1109
            alpha._values[0*N + i] /= sum
1110

1111
        # Induction
1112
        # The code below is just an optimized version of:
1113
        #     alpha = (alpha * A).pairwise_product(B[O[t+1]])
1114
        #     alpha = alpha.scale(1/alpha.sum())
1115
        # along with keeping track of the log of the scaling factor.
1116
        cdef double s
1117
        for t in range(1,T):
1118
            sum = 0
1119
            for j in range(N):
1120
                s = 0
1121
                for i in range(N):
1122
                    s += alpha._values[(t-1)*N + i] * self.A._values[i*N + j]
1123
                a = s * self.B._values[j*self.n_out + obs._values[t]]
1124
                alpha._values[t*N + j] = a
1125
                sum += a
1126

1127
            log_probability += log(sum)
1128
            scale._values[t] = sum
1129
            for j in range(self.N):
1130
                alpha._values[t*N + j] /= sum
1131

1132
        # Termination
1133
        return alpha, scale, log_probability
1134

1135
    cdef TimeSeries _baum_welch_xi(self, TimeSeries alpha, TimeSeries beta, IntList obs):
1136
        """
1137
        Used internally to compute the scaled quantity xi_t(i,j)
1138
        appearing in the Baum-Welch reestimation algorithm.
1139

1140
        INPUT:
1141

1142
            - ``alpha`` -- TimeSeries as output by the scaled forward algorithm
1143
            - ``beta`` -- TimeSeries as output by the scaled backward algorithm
1144
            - ``obs ``-- IntList of observations
1145

1146
        OUTPUT:
1147

1148
            - TimeSeries xi such that xi[t*N*N + i*N + j] = xi_t(i,j).
1149
        """
1150
        cdef int i, j, N = self.N
1151
        cdef double sum
1152
        cdef Py_ssize_t t, T = alpha._length//N
1153
        cdef TimeSeries xi = TimeSeries(T*N*N, initialize=False)
1154
        sig_on()
1155
        for t in range(T-1):
1156
            sum = 0.0
1157
            for i in range(N):
1158
                for j in range(N):
1159
                    xi._values[t*N*N + i*N + j] = alpha._values[t*N + i]*beta._values[(t+1)*N + j]*\
1160
                       self.A._values[i*N + j] * self.B._values[j*self.n_out + obs._values[t+1]]
1161
                    sum += xi._values[t*N*N + i*N + j]
1162
            for i in range(N):
1163
                for j in range(N):
1164
                    xi._values[t*N*N + i*N + j] /= sum
1165
        sig_off()
1166
        return xi
1167

1168
    def baum_welch(self, obs, int max_iter=100, double log_likelihood_cutoff=1e-4, bint fix_emissions=False):
1169
        """
1170
        Given an observation sequence obs, improve this HMM using the
1171
        Baum-Welch algorithm to increase the probability of observing obs.
1172

1173
        INPUT:
1174

1175
            - ``obs`` -- list of emissions
1176

1177
            - ``max_iter`` -- integer (default: 100) maximum number
1178
              of Baum-Welch steps to take
1179

1180
            - ``log_likelihood_cutoff`` -- positive float (default: 1e-4);
1181
              the minimal improvement in likelihood with respect to
1182
              the last iteration required to continue. Relative value
1183
              to log likelihood.
1184

1185
            - ``fix_emissions`` -- bool (default: False); if True, do not
1186
              change emissions when updating
1187

1188
        OUTPUT:
1189

1190
            - changes the model in places, and returns the log
1191
              likelihood and number of iterations.
1192

1193
        EXAMPLES::
1194

1195
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.9,0.1]], [[.5,.5],[0,1]], [.2,.8])
1196
            sage: m.baum_welch([1,0]*20, log_likelihood_cutoff=0)
1197
            (0.0, 4)
1198
            sage: m
1199
            Discrete Hidden Markov Model with 2 States and 2 Emissions
1200
            Transition matrix:
1201
            [1.35152697077e-51               1.0]
1202
            [              1.0               0.0]
1203
            Emission matrix:
1204
            [              1.0 6.46253713885e-52]
1205
            [              0.0               1.0]
1206
            Initial probabilities: [0.0000, 1.0000]
1207

1208
        The following illustrates how Baum-Welch is only a local
1209
        optimizer, i.e., the above model is far more likely to produce
1210
        the sequence [1,0]*20 than the one we get below::
1211

1212
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[.5,.5],[.5,.5]], [.5,.5])
1213
            sage: m.baum_welch([1,0]*20, log_likelihood_cutoff=0)
1214
            (-27.725887222397784, 1)
1215
            sage: m
1216
            Discrete Hidden Markov Model with 2 States and 2 Emissions
1217
            Transition matrix:
1218
            [0.5 0.5]
1219
            [0.5 0.5]
1220
            Emission matrix:
1221
            [0.5 0.5]
1222
            [0.5 0.5]
1223
            Initial probabilities: [0.5000, 0.5000]
1224

1225
        We illustrate fixing emissions::
1226

1227
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.9,0.1]], [[.5,.5],[.2,.8]], [.2,.8])
1228
            sage: set_random_seed(0); v = m.sample(100)
1229
            sage: m.baum_welch(v,fix_emissions=True)
1230
            (-66.98630856918774, 100)
1231
            sage: m.emission_matrix()
1232
            [0.5 0.5]
1233
            [0.2 0.8]
1234
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.9,0.1]], [[.5,.5],[.2,.8]], [.2,.8])
1235
            sage: m.baum_welch(v)
1236
            (-66.7823606592935..., 100)
1237
            sage: m.emission_matrix()
1238
            [0.530308574863 0.469691425137]
1239
            [0.290977555017 0.709022444983]
1240
        """
1241
        if self._emission_symbols is not None:
1242
            obs = self._emission_symbols_to_IntList(obs)
1243
        elif not isinstance(obs, IntList):
1244
            obs = IntList(obs)
1245
        cdef IntList _obs = obs
1246
        cdef TimeSeries alpha, beta, scale, gamma, xi
1247
        cdef double log_probability, log_probability_prev, delta
1248
        cdef int i, j, k, N, n_iterations
1249
        cdef Py_ssize_t t, T
1250
        cdef double denominator_A, numerator_A, denominator_B, numerator_B
1251

1252
        # Initialization
1253
        alpha, scale, log_probability = self._forward_scale_all(_obs)
1254
        beta = self._backward_scale_all(_obs, scale)
1255
        gamma = self._baum_welch_gamma(alpha, beta)
1256
        xi = self._baum_welch_xi(alpha, beta, _obs)
1257
        log_probability_prev = log_probability
1258
        N = self.N
1259
        n_iterations = 0
1260
        T = len(_obs)
1261

1262
        # Re-estimation
1263
        while True:
1264

1265
            # Reestimate frequency of state i in time t=0
1266
            for i in range(N):
1267
                self.pi._values[i] = gamma._values[0*N+i]
1268

1269
            # Reestimate transition matrix and emissions probability in
1270
            # each state.
1271
            for i in range(N):
1272
                denominator_A = 0.0
1273
                for t in range(T-1):
1274
                    denominator_A += gamma._values[t*N+i]
1275
                for j in range(N):
1276
                    numerator_A = 0.0
1277
                    for t in range(T-1):
1278
                        numerator_A += xi._values[t*N*N+i*N+j]
1279
                    self.A._values[i*N+j] = numerator_A / denominator_A
1280

1281
                if not fix_emissions:
1282
                    denominator_B = denominator_A + gamma._values[(T-1)*N + i]
1283
                    for k in range(self.n_out):
1284
                        numerator_B = 0.0
1285
                        for t in range(T):
1286
                            if _obs._values[t] == k:
1287
                                numerator_B += gamma._values[t*N + i]
1288
                        self.B._values[i*self.n_out + k] = numerator_B / denominator_B
1289

1290
            # Initialization
1291
            alpha, scale, log_probability = self._forward_scale_all(_obs)
1292
            beta = self._backward_scale_all(_obs, scale)
1293
            gamma = self._baum_welch_gamma(alpha, beta)
1294
            xi = self._baum_welch_xi(alpha, beta, _obs)
1295

1296
            # Compute the difference between the log probability of
1297
            # two iterations.
1298
            delta = log_probability - log_probability_prev
1299
            log_probability_prev = log_probability
1300
            n_iterations += 1
1301

1302
            # If the log probability does not change by more than
1303
            # delta, then terminate
1304
            if delta <= log_likelihood_cutoff or n_iterations >= max_iter:
1305
                break
1306

1307
        return log_probability, n_iterations
1308

1309

1310
# Keep this -- it's for backwards compatibility with the GHMM based implementation
1311
def unpickle_discrete_hmm_v0(A, B, pi, emission_symbols, name):
1312
    """
1313
    TESTS::
1314

1315
        sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[0.5,0.5]], [1,0])
1316
        sage: sage.stats.hmm.hmm.unpickle_discrete_hmm_v0(m.transition_matrix(), m.emission_matrix(), m.initial_probabilities(), ['a','b'], 'test model')
1317
        Discrete Hidden Markov Model with 2 States and 2 Emissions...
1318
    """
1319
    return DiscreteHiddenMarkovModel(A,B,pi,emission_symbols,normalize=False)
1320

1321
def unpickle_discrete_hmm_v1(A, B, pi, n_out, emission_symbols, emission_symbols_dict):
1322
    """
1323
    TESTS::
1324

1325
        sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[0.5,0.5]], [1,0],['a','b'])
1326
        sage: loads(dumps(m)) == m   # indirect test
1327
        True
1328
    """
1329
    cdef DiscreteHiddenMarkovModel m = PY_NEW(DiscreteHiddenMarkovModel)
1330
    m.A = A
1331
    m.B = B
1332
    m.pi = pi
1333
    m.n_out = n_out
1334
    m._emission_symbols = emission_symbols
1335
    m._emission_symbols_dict = emission_symbols_dict
1336
    return m
1337

1338

1339

1340
Product

Resources

Company