CoCalc -- hmm.pyx

GitHub Repository: sagemath/sagelib
Path: blob/master/sage/stats/hmm/hmm.pyx
⁴¹⁰⁸ views
1
"""
2
Hidden Markov Models
3

4
This is a complete pure-Cython optimized implementation of Hidden
5
Markov Models.  It fully supports Discrete, Gaussian, and Mixed
6
Gaussian emissions.
7

8
The best references for the basic HMM algorithms implemented here are:
9

10
   -  Tapas Kanungo's "Hidden Markov Models"
11

12
   -  Jackson's HMM tutorial:
13
        http://personal.ee.surrey.ac.uk/Personal/P.Jackson/tutorial/
14

15
LICENSE: Some of the code in this file is based on reading Kanungo's
16
GPLv2+ implementation of discrete HMM's, hence the present code must
17
be licensed with a GPLv2+ compatible license.
18

19
AUTHOR:
20

21
   - William Stein, 2010-03
22
"""
23

24
#############################################################################
25
#       Copyright (C) 2010 William Stein <[email protected]>
26
#  Distributed under the terms of the GNU General Public License (GPL) v2+.
27
#  The full text of the GPL is available at:
28
#                  http://www.gnu.org/licenses/
29
#############################################################################
30

31
include "../../ext/stdsage.pxi"
32
include "../../ext/cdefs.pxi"
33
include "../../ext/interrupt.pxi"
34

35
cdef extern from "math.h":
36
    double log(double)
37

38
from sage.finance.time_series cimport TimeSeries
39
from sage.matrix.all import is_Matrix, matrix
40
from sage.misc.randstate cimport current_randstate, randstate
41

42
from util cimport HMM_Util
43

44
cdef HMM_Util util = HMM_Util()
45

46
###########################################
47

48
cdef class HiddenMarkovModel:
49
    """
50
    Abstract base class for all Hidden Markov Models.
51
    """
52
    def initial_probabilities(self):
53
        """
54
        Return the initial probabilities, which as a TimeSeries of
55
        length N, where N is the number of states of the Markov model.
56

57
        EXAMPLES::
58

59
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8])
60
            sage: pi = m.initial_probabilities(); pi
61
            [0.2000, 0.8000]
62
            sage: type(pi)
63
            <type 'sage.finance.time_series.TimeSeries'>
64

65
        The returned time series is a copy, so changing it does not
66
        change the model.
67

68
            sage: pi[0] = .1; pi[1] = .9
69
            sage: m.initial_probabilities()
70
            [0.2000, 0.8000]
71

72
        Some other models::
73
        
74
            sage: hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,1), (-1,1)], [.1,.9]).initial_probabilities()
75
            [0.1000, 0.9000]
76
            sage: hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3]).initial_probabilities()
77
            [0.7000, 0.3000]
78
        """
79
        return TimeSeries(self.pi)
80

81
    def transition_matrix(self):
82
        """
83
        Return the state transition matrix.
84

85
        OUTPUT:
86

87
            - a Sage matrix with real double precision (RDF) entries.
88

89
        EXAMPLES::
90

91
            sage: M = hmm.DiscreteHiddenMarkovModel([[0.7,0.3],[0.9,0.1]], [[0.5,.5],[.1,.9]], [0.3,0.7])
92
            sage: T = M.transition_matrix(); T
93
            [0.7 0.3]
94
            [0.9 0.1]
95

96
        The returned matrix is mutable, but changing it does not
97
        change the transition matrix for the model::
98

99
            sage: T[0,0] = .1; T[0,1] = .9
100
            sage: M.transition_matrix()
101
            [0.7 0.3]
102
            [0.9 0.1]
103

104
        Transition matrices for other types of models::
105

106
            sage: hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,1), (-1,1)], [.5,.5]).transition_matrix()
107
            [0.1 0.9]
108
            [0.5 0.5]
109
            sage: hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3]).transition_matrix()
110
            [0.9 0.1]
111
            [0.4 0.6]
112
        """
113
        from sage.matrix.constructor import matrix
114
        from sage.rings.all import RDF
115
        return matrix(RDF, self.N, self.A.list())
116

117
    def graph(self, eps=1e-3):
118
        """
119
        Create a weighted directed graph from the transition matrix,
120
        not including any edge with a probability less than eps.
121

122
        INPUT:
123

124
            - eps -- nonnegative real number
125

126
        OUTPUT:
127

128
            - a digraph
129

130
        EXAMPLES::
131

132
            sage: m = hmm.DiscreteHiddenMarkovModel([[.3,0,.7],[0,0,1],[.5,.5,0]], [[.5,.5,.2]]*3, [1/3]*3)
133
            sage: G = m.graph(); G
134
            Looped multi-digraph on 3 vertices
135
            sage: G.edges()
136
            [(0, 0, 0.3), (0, 2, 0.7), (1, 2, 1.0), (2, 0, 0.5), (2, 1, 0.5)]
137
            sage: G.plot()
138
        """
139
        cdef int i, j
140
        m = self.transition_matrix()
141
        for i in range(self.N):
142
            for j in range(self.N):
143
                if m[i,j] < eps:
144
                    m[i,j] = 0
145
        from sage.graphs.all import DiGraph
146
        return DiGraph(m, weighted=True)
147

148
    def sample(self, Py_ssize_t length, number=None, starting_state=None):
149
        """
150
        Return number samples from this HMM of given length.
151

152
        INPUT:
153
        
154
            - ``length`` -- positive integer
155
            - ``number`` -- (default: None) if given, compute list of this many sample sequences
156
            - ``starting_state`` -- int (or None); if specified then generate
157
              a sequence using this model starting with the given state
158
              instead of the initial probabilities to determine the
159
              starting state.
160

161
        OUTPUT:
162
        
163
            - if number is not given, return a single TimeSeries.
164
            - if number is given, return a list of TimeSeries.
165

166
        EXAMPLES::
167
        
168
            sage: set_random_seed(0)
169
            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1])
170
            sage: print a.sample(10, 3)
171
            [[1, 0, 1, 1, 1, 1, 0, 1, 1, 1], [1, 1, 0, 0, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 0, 1, 0, 1, 1, 1]]
172
            sage: a.sample(15)
173
            [1, 1, 1, 1, 0 ... 1, 1, 1, 1, 1]
174
            sage: a.sample(3, 1)
175
            [[1, 1, 1]]
176
            sage: list(a.sample(1000)).count(0)
177
            88
178

179
        If the emission symbols are set::
180
        
181
            sage: set_random_seed(0)
182
            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', 'down'])
183
            sage: a.sample(10)
184
            ['down', 'up', 'down', 'down', 'down', 'down', 'up', 'up', 'up', 'up']
185

186
        Force a starting state::
187

188
            sage: set_random_seed(0); a.sample(10, starting_state=0)
189
            ['up', 'up', 'down', 'down', 'down', 'down', 'up', 'up', 'up', 'up']
190
        """
191
        if number is None:
192
            return self.generate_sequence(length, starting_state=starting_state)[0]
193

194
        cdef Py_ssize_t i
195
        return [self.generate_sequence(length, starting_state=starting_state)[0] for i in range(number)]
196

197

198
    #########################################################
199
    # Some internal funcitons used for various general
200
    # HMM algorithms.
201
    #########################################################
202
    cdef TimeSeries _baum_welch_gamma(self, TimeSeries alpha, TimeSeries beta):
203
        """
204
        Used internally to compute the scaled quantity gamma_t(j)
205
        appearing in the Baum-Welch reestimation algorithm.
206

207
        The quantity gamma_t(j) is the (scaled!) probability of being
208
        in state j at time t, given the observation sequence.
209

210
        INPUT:
211

212
            - ``alpha`` -- TimeSeries as output by the scaled forward algorithm
213
            - ``beta`` -- TimeSeries as output by the scaled backward algorithm
214

215
        OUTPUT:
216

217
            - TimeSeries gamma such that gamma[t*N+j] is gamma_t(j).
218
        """
219
        cdef int j, N = self.N
220
        cdef Py_ssize_t t, T = alpha._length//N
221
        cdef TimeSeries gamma = TimeSeries(alpha._length, initialize=False)
222
        cdef double denominator
223
        sig_on()
224
        for t in range(T):
225
            denominator = 0
226
            for j in range(N):
227
                gamma._values[t*N + j] = alpha._values[t*N + j] * beta._values[t*N + j]
228
                denominator += gamma._values[t*N + j]
229
            for j in range(N):
230
                gamma._values[t*N + j] /= denominator
231
        sig_off()
232
        return gamma
233

234

235
cdef class DiscreteHiddenMarkovModel(HiddenMarkovModel):
236
    """
237
    A discrete Hidden Markov model implemented using double precision
238
    floating point arithmetic.
239

240
    INPUT:
241

242
        - ``A`` -- a list of lists or a square N x N matrix, whose
243
          (i,j) entry gives the probability of transitioning from
244
          state i to state j.
245

246
        - ``B`` -- a list of N lists or a matrix with N rows, such that
247
          B[i,k] gives the probability of emitting symbol k while
248
          in state i.
249

250
        - ``pi`` -- the probabilities of starting in each initial
251
          state, i.e,. pi[i] is the probability of starting in
252
          state i.
253

254
        - ``emission_symbols`` -- None or list (default: None); if
255
          None, the emission_symbols are the ints [0..N-1], where N
256
          is the number of states.  Otherwise, they are the entries
257
          of the list emissions_symbols, which must all be hashable.
258

259
        - ``normalize`` --bool (default: True); if given, input is
260
          normalized to define valid probability distributions,
261
          e.g., the entries of A are made nonnegative and the rows
262
          sum to 1, and the probabilities in pi are normalized.
263

264
    EXAMPLES::
265

266
        sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.5,.5]); m
267
        Discrete Hidden Markov Model with 2 States and 2 Emissions
268
        Transition matrix:
269
        [0.4 0.6]
270
        [0.1 0.9]
271
        Emission matrix:
272
        [0.1 0.9]
273
        [0.5 0.5]
274
        Initial probabilities: [0.5000, 0.5000]
275
        sage: m.log_likelihood([0,1,0,1,0,1])
276
        -4.66693474691329...
277
        sage: m.viterbi([0,1,0,1,0,1])
278
        ([1, 1, 1, 1, 1, 1], -5.378832842208748)
279
        sage: m.baum_welch([0,1,0,1,0,1])
280
        (0.0, 22)
281
        sage: m
282
        Discrete Hidden Markov Model with 2 States and 2 Emissions
283
        Transition matrix:
284
        [1.0134345614...e-70               1.0]
285
        [              1.0 3.997435271...e-19]
286
        Emission matrix:
287
        [7.3802215662...e-54               1.0]
288
        [              1.0  3.99743526...e-19]
289
        Initial probabilities: [0.0000, 1.0000]
290
        sage: m.sample(10)
291
        [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
292
        sage: m.graph().plot()
293

294
    A 3-state model that happens to always outputs 'b'::
295

296
        sage: m = hmm.DiscreteHiddenMarkovModel([[1/3]*3]*3, [[0,1,0]]*3, [1/3]*3, ['a','b','c'])
297
        sage: m.sample(10)
298
        ['b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b']    
299
    """
300
    cdef TimeSeries B
301
    cdef int n_out
302
    cdef object _emission_symbols, _emission_symbols_dict
303

304
    def __init__(self, A, B, pi, emission_symbols=None, bint normalize=True):
305
        """
306
        Create a discrete emissions HMM with transition probability
307
        matrix A, emission probabilities given by B, initial state
308
        probabilities pi, and given emission symbols (which default
309
        to the first few nonnegative integers).
310

311
        EXAMPLES::
312

313
            sage: hmm.DiscreteHiddenMarkovModel([.5,0,-1,.5], [[1],[1]],[.5,.5]).transition_matrix()
314
            [1.0 0.0]
315
            [0.0 1.0]
316
            sage: hmm.DiscreteHiddenMarkovModel([0,0,.1,.9], [[1],[1]],[.5,.5]).transition_matrix()
317
            [0.5 0.5]
318
            [0.1 0.9]
319
            sage: hmm.DiscreteHiddenMarkovModel([-1,-2,.1,.9], [[1],[1]],[.5,.5]).transition_matrix()
320
            [0.5 0.5]
321
            [0.1 0.9]
322
            sage: hmm.DiscreteHiddenMarkovModel([1,2,.1,1.2], [[1],[1]],[.5,.5]).transition_matrix()
323
            [ 0.333333333333  0.666666666667]
324
            [0.0769230769231  0.923076923077]
325
        """
326
        self.pi = util.initial_probs_to_TimeSeries(pi, normalize)
327
        self.N = len(self.pi)
328
        self.A = util.state_matrix_to_TimeSeries(A, self.N, normalize)
329
        self._emission_symbols = emission_symbols
330
        if self._emission_symbols is not None:
331
            self._emission_symbols_dict = dict([(y,x) for x,y in enumerate(emission_symbols)])
332

333
        if not is_Matrix(B):
334
            B = matrix(B)
335
        if B.nrows() != self.N:
336
            raise ValueError, "number of rows of B must equal number of states"
337
        self.B = TimeSeries(B.list())
338
        self.n_out = B.ncols()
339
        if emission_symbols is not None and len(emission_symbols) != self.n_out:
340
            raise ValueError, "number of emission symbols must equal number of output states"
341
        cdef Py_ssize_t i
342
        if normalize:
343
            for i in range(self.N):
344
                util.normalize_probability_TimeSeries(self.B, i*self.n_out, (i+1)*self.n_out)
345

346
    def __reduce__(self):
347
        """
348
        Used in pickling.
349

350
        EXAMPLES::
351
        
352
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [0,1], ['a','b'])
353
            sage: loads(dumps(m)) == m
354
            True
355
        """
356
        return unpickle_discrete_hmm_v1, \
357
               (self.A, self.B, self.pi, self.n_out, self._emission_symbols, self._emission_symbols_dict)
358

359
    def __cmp__(self, other):
360
        """
361
        EXAMPLES::
362

363
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[0.5,0.5]], [.5,.5])
364
            sage: n = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[0.5,0.5]], [1,0])
365
            sage: m == n
366
            False
367
            sage: m == m
368
            True
369
            sage: m < n
370
            True
371
            sage: n < m
372
            False
373
        """
374
        if not isinstance(other, DiscreteHiddenMarkovModel):
375
            raise ValueError
376
        return cmp(self.__reduce__()[1], other.__reduce__()[1])
377

378

379
    def emission_matrix(self):
380
        """
381
        Return the matrix whose i-th row specifies the emission
382
        probability distribution for the i-th state.  More precisely,
383
        the i,j entry of the matrix is the probability of the Markov
384
        model outputing the j-th symbol when it is in the i-th state.
385

386
        OUTPUT:
387

388
            - a Sage matrix with real double precision (RDF) entries.
389

390
        EXAMPLES::
391

392
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.5,.5])
393
            sage: E = m.emission_matrix(); E
394
            [0.1 0.9]
395
            [0.5 0.5]
396

397
        The returned matrix is mutable, but changing it does not
398
        change the transition matrix for the model::
399

400
            sage: E[0,0] = 0; E[0,1] = 1
401
            sage: m.emission_matrix()
402
            [0.1 0.9]
403
            [0.5 0.5]
404
        """
405
        from sage.matrix.constructor import matrix
406
        from sage.rings.all import RDF
407
        return matrix(RDF, self.N, self.n_out, self.B.list())
408

409

410
    def __repr__(self):
411
        """
412
        Return string representation of this discrete hidden Markov model.
413

414
        EXAMPLES::
415

416
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8])
417
            sage: m.__repr__()
418
            'Discrete Hidden Markov Model with 2 States and 2 Emissions\nTransition matrix:\n[0.4 0.6]\n[0.1 0.9]\nEmission matrix:\n[0.1 0.9]\n[0.5 0.5]\nInitial probabilities: [0.2000, 0.8000]'
419
        """
420
        s = "Discrete Hidden Markov Model with %s States and %s Emissions"%(
421
            self.N, self.n_out)
422
        s += '\nTransition matrix:\n%s'%self.transition_matrix()
423
        s += '\nEmission matrix:\n%s'%self.emission_matrix()
424
        s += '\nInitial probabilities: %s'%self.initial_probabilities()
425
        if self._emission_symbols is not None:
426
            s += '\nEmission symbols: %s'%self._emission_symbols
427
        return s
428

429
    def _emission_symbols_to_IntList(self, obs):
430
        """
431
        Internal function used to convert a list of emission symbols to an IntList.
432

433
        INPUT:
434

435
            - obs -- a list of objects
436

437
        OUTPUT:
438

439
            - an IntList
440

441
        EXAMPLES::
442

443
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8], ['smile', 'frown'])
444
            sage: m._emission_symbols_to_IntList(['frown','smile'])
445
            [1, 0]
446
        """
447
        d = self._emission_symbols_dict
448
        return IntList([d[x] for x in obs])
449

450
    def _IntList_to_emission_symbols(self, obs):
451
        """
452
        Internal function used to convert a list of emission symbols to an IntList.
453

454
        INPUT:
455

456
            - obs -- a list of objects
457

458
        OUTPUT:
459

460
            - an IntList
461

462
        EXAMPLES::
463

464
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8], ['smile', 'frown'])
465
            sage: m._IntList_to_emission_symbols(stats.IntList([0,0,1,0]))
466
            ['smile', 'smile', 'frown', 'smile']
467
        """
468
        d = self._emission_symbols
469
        return [d[x] for x in obs]
470

471
    def log_likelihood(self, obs, bint scale=True):
472
        """
473
        Return the logarithm of the probability that this model produced the given
474
        observation sequence.  Thus the output is a non-positive number.
475

476
        INPUT:
477

478
            - ``obs`` -- sequence of observations
479

480
            - ``scale`` -- boolean (default: True); if True, use rescaling
481
              to overoid loss of precision due to the very limited
482
              dynamic range of floats.  You should leave this as True
483
              unless the obs sequence is very small.
484

485
        EXAMPLES::
486

487
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8])
488
            sage: m.log_likelihood([0, 1, 0, 1, 1, 0, 1, 0, 0, 0])
489
            -7.3301308009370825
490
            sage: m.log_likelihood([0, 1, 0, 1, 1, 0, 1, 0, 0, 0], scale=False)
491
            -7.330130800937082
492
            sage: m.log_likelihood([])
493
            0.0
494

495
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8], ['happy','sad'])
496
            sage: m.log_likelihood(['happy','happy'])
497
            -1.6565295199679506
498
            sage: m.log_likelihood(['happy','sad'])
499
            -1.4731602941415523
500

501
        Overflow from not using the scale option::
502

503
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8])
504
            sage: m.log_likelihood([0,1]*1000, scale=True)
505
            -1433.820666652728
506
            sage: m.log_likelihood([0,1]*1000, scale=False)
507
            -inf
508
        """
509
        if len(obs) == 0:
510
            return 0.0
511
        if self._emission_symbols is not None:
512
            obs = self._emission_symbols_to_IntList(obs)
513
        if not isinstance(obs, IntList):
514
            obs = IntList(obs)
515
        if scale:
516
            return self._forward_scale(obs)
517
        else:
518
            return self._forward(obs)
519

520
    def _forward(self, IntList obs):
521
        """
522
        Memory-efficient implementation of the forward algorithm, without scaling.
523

524
        INPUT:
525

526
            - ``obs`` -- an integer list of observation states.
527

528
        OUTPUT:
529

530
            - ``float`` -- the log of the probability that the model produced this sequence
531

532
        EXAMPLES::
533

534
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8])
535
            sage: m._forward(stats.IntList([0,1]*10))
536
            -14.378663512219456
537

538
        The forward algorithm computes the log likelihood::
539

540
            sage: m.log_likelihood(stats.IntList([0,1]*10), scale=False)
541
            -14.378663512219456
542

543
        But numerical overflow will happen (without scaling) for long sequences::
544

545
            sage: m._forward(stats.IntList([0,1]*1000))
546
            -inf
547
        """
548
        if obs.max() > self.N or obs.min() < 0:
549
            raise ValueError, "invalid observation sequence, since it includes unknown states"
550

551
        cdef Py_ssize_t i, j, t, T = len(obs)
552

553
        cdef TimeSeries alpha  = TimeSeries(self.N), \
554
                        alpha2 = TimeSeries(self.N)
555

556
        # Initialization
557
        for i in range(self.N):
558
            alpha[i] = self.pi[i] * self.B[i*self.n_out + obs._values[0]]
559
        alpha[i] = self.pi[i] * self.B[i*self.n_out + obs._values[0]]
560

561
        # Induction
562
        cdef double s
563
        for t in range(1, T):
564
            for j in range(self.N):
565
                s = 0
566
                for i in range(self.N):
567
                    s += alpha._values[i] * self.A._values[i*self.N + j]
568
                alpha2._values[j] = s * self.B._values[j*self.n_out+obs._values[t]]
569
            for j in range(self.N):
570
                alpha._values[j] = alpha2._values[j]
571

572
        # Termination
573
        return log(alpha.sum())
574

575
    def _forward_scale(self, IntList obs):
576
        """
577
        Memory-efficient implementation of the forward algorithm, with scaling.
578

579
        INPUT:
580

581
            - ``obs`` -- an integer list of observation states.
582

583
        OUTPUT:
584

585
            - ``float`` -- the log of the probability that the model produced this sequence
586

587
        EXAMPLES::
588

589
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.5,0.5]], [.2,.8])
590
            sage: m._forward_scale(stats.IntList([0,1]*10))
591
            -14.378663512219454
592

593
        The forward algorithm computes the log likelihood::
594

595
            sage: m.log_likelihood(stats.IntList([0,1]*10))
596
            -14.378663512219454
597

598
        Note that the scale algorithm ensures that numerical overflow
599
        won't happen for long sequences like it does with the forward
600
        non-scaled algorithm::
601

602
            sage: m._forward_scale(stats.IntList([0,1]*1000))
603
            -1433.820666652728
604
            sage: m._forward(stats.IntList([0,1]*1000))
605
            -inf
606

607
        A random sequence produced by the model is more likely::
608

609
            sage: set_random_seed(0); v = m.sample(1000)
610
            sage: m._forward_scale(v)
611
            -686.8753189365056
612
        """
613
        # This is just like self._forward(obs) above, except at every step of the
614
        # algorithm, we rescale the vector alpha so that the sum of
615
        # the entries in alpha is 1.  Then, at the end of the
616
        # algorithm, the sum of probabilities (alpha.sum()) is of
617
        # course just 1.  However, the true probability that we want
618
        # is the product of the numbers that we divided by when
619
        # rescaling, since at each step of the iteration the next term
620
        # depends linearly on the previous one.  Instead of returning
621
        # the product, we return the sum of the logs, which avoid
622
        # numerical error.
623
        cdef Py_ssize_t i, j, t, T = len(obs)
624

625
        # The running some of the log probabilities
626
        cdef double log_probability = 0, sum, a
627

628
        cdef TimeSeries alpha  = TimeSeries(self.N), \
629
                        alpha2 = TimeSeries(self.N)
630

631
        # Initialization
632
        sum = 0
633
        for i in range(self.N):
634
            a = self.pi[i] * self.B[i*self.n_out + obs._values[0]]
635
            alpha[i] = a
636
            sum += a
637

638
        log_probability = log(sum)
639
        alpha.rescale(1/sum)
640

641
        # Induction
642
        # The code below is just an optimized version of:
643
        #     alpha = (alpha * A).pairwise_product(B[O[t+1]])
644
        #     alpha = alpha.scale(1/alpha.sum())
645
        # along with keeping track of the log of the scaling factor.
646
        cdef double s
647
        for t in range(1, T):
648
            sum = 0
649
            for j in range(self.N):
650
                s = 0
651
                for i in range(self.N):
652
                    s += alpha._values[i] * self.A._values[i*self.N + j]
653
                a = s * self.B._values[j*self.n_out+obs._values[t]]
654
                alpha2._values[j] = a
655
                sum += a
656

657
            log_probability += log(sum)
658
            for j in range(self.N):
659
                alpha._values[j] = alpha2._values[j] / sum
660

661
        # Termination
662
        return log_probability
663

664
    def generate_sequence(self, Py_ssize_t length, starting_state=None):
665
        """
666
        Return a sample of the given length from this HMM.
667

668
        INPUT:
669

670
            - ``length`` -- positive integer
671
            - ``starting_state`` -- int (or None); if specified then generate
672
              a sequence using this model starting with the given state
673
              instead of the initial probabilities to determine the
674
              starting state.
675
            
676

677
        OUTPUT:
678

679
            - an IntList or list of emission symbols
680
            - IntList of the actual states the model was in when
681
              emitting the corresponding symbols
682

683
        EXAMPLES:
684

685
        In this example, the emission symbols are not set::
686

687
            sage: set_random_seed(0)
688
            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1])
689
            sage: a.generate_sequence(5)
690
            ([1, 0, 1, 1, 1], [1, 0, 1, 1, 1])
691
            sage: list(a.generate_sequence(1000)[0]).count(0)
692
            90
693

694
        Here the emission symbols are set::
695

696
            sage: set_random_seed(0)
697
            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', 'down'])
698
            sage: a.generate_sequence(5)
699
            (['down', 'up', 'down', 'down', 'down'], [1, 0, 1, 1, 1])
700

701
        Specify the starting state::
702
        
703
            sage: set_random_seed(0); a.generate_sequence(5, starting_state=0)
704
            (['up', 'up', 'down', 'down', 'down'], [0, 0, 1, 1, 1])
705
        """
706
        if length < 0:
707
            raise ValueError, "length must be nonnegative"
708

709
        # Create Integer lists for states and observations
710
        cdef IntList states = IntList(length)
711
        cdef IntList obs = IntList(length)
712
        if length == 0:
713
            # A special case
714
            if self._emission_symbols is None:
715
                return states, obs
716
            else:
717
                return states, []
718

719
        # Setup variables, including random state.
720
        cdef Py_ssize_t i, j
721
        cdef randstate rstate = current_randstate()
722
        cdef int q = 0
723
        cdef double r, accum
724
        r = rstate.c_rand_double()
725

726
        # Now choose random variables from our discrete distribution.
727

728
        # This standard naive algorithm has complexity that is linear
729
        # in the number of states.  It might be possible to replace it
730
        # by something that is more efficient.  However, make sure to
731
        # refactor this code into distribution.pyx before doing so.
732
        # Note that state switching involves the same algorithm
733
        # (below).  Use GSL as described here to make this O(1):
734
        #    http://www.gnu.org/software/gsl/manual/html_node/General-Discrete-Distributions.html
735

736
        # Choose initial state:
737
        if starting_state is None:
738
            accum = 0
739
            for i in range(self.N):
740
                if r < self.pi._values[i] + accum:
741
                    q = i
742
                else:
743
                    accum += self.pi._values[i]
744
        else:
745
            q = starting_state
746
            if q < 0 or q >= self.N:
747
                raise ValueError, "starting state must be between 0 and %s"%(self.N-1)
748

749
        states._values[0] = q
750
        # Generate a symbol from state q
751
        obs._values[0] = self._gen_symbol(q, rstate.c_rand_double())
752

753
        cdef double* row
754
        cdef int O
755
        sig_on()
756
        for i in range(1, length):
757
            # Choose next state
758
            accum = 0
759
            row = self.A._values + q*self.N
760
            r = rstate.c_rand_double()
761
            for j in range(self.N):
762
                if r < row[j] + accum:
763
                    q = j
764
                    break
765
                else:
766
                    accum += row[j]
767
            states._values[i] = q
768
            # Generate symbol from this new state q
769
            obs._values[i] = self._gen_symbol(q, rstate.c_rand_double())
770
        sig_off()
771

772
        if self._emission_symbols is None:
773
            # No emission symbol mapping
774
            return obs, states
775
        else:
776
            # Emission symbol mapping, so change our intlist into a list of symbols
777
            return self._IntList_to_emission_symbols(obs), states
778

779
    cdef int _gen_symbol(self, int q, double r):
780
        """
781
        Generate a symbol in state q using the randomly chosen
782
        floating point number r, which should be between 0 and 1.
783

784
        INPUT:
785

786
            - ``q`` -- a nonnegative integer, which specifies a state
787
            - ``r`` -- a real number between 0 and 1
788

789
        OUTPUT:
790

791
            - a nonnegative int
792

793
        EXAMPLES:
794

795
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.1,0.9],[0.9,0.1]], [.2,.8])
796
            sage: set_random_seed(0)
797
            sage: m.generate_sequence(10)  # indirect test
798
            ([0, 1, 0, 0, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 1, 1, 0, 0, 0, 0])
799
        """
800
        cdef Py_ssize_t j
801
        cdef double a, accum = 0
802
        # See the comments above about switching to GSL for this; also note
803
        # that this should get factored out into a call to something
804
        # defined in distributions.pyx.
805
        for j in range(self.n_out):
806
            a = self.B._values[q*self.n_out + j]
807
            if r < a + accum:
808
                return j
809
            else:
810
                accum += a
811
        # None of the values was selected: shouldn't this only happen if the
812
        # distribution is invalid?   Anyway, this will get factored out.
813
        return self.n_out - 1
814

815
    def viterbi(self, obs, log_scale=True):
816
        """
817
        Determine "the" hidden sequence of states that is most likely
818
        to produce the given sequence seq of observations, along with
819
        the probability that this hidden sequence actually produced
820
        the observation.
821

822
        INPUT:
823

824
            - ``seq`` -- sequence of emitted ints or symbols
825

826
            - ``log_scale`` -- bool (default: True) whether to scale the
827
              sequence in order to avoid numerical overflow.
828

829
        OUTPUT:
830

831
            - ``list`` -- "the" most probable sequence of hidden states, i.e.,
832
              the Viterbi path.
833

834
            - ``float`` -- log of probability that the observed sequence
835
              was produced by the Viterbi sequence of states.
836

837
        EXAMPLES::
838

839
            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[0.9,0.1],[0.1,0.9]], [0.5,0.5])
840
            sage: a.viterbi([1,0,0,1,0,0,1,1])
841
            ([1, 0, 0, 1, ..., 0, 1, 1], -11.06245322477221...)
842

843
        We predict the state sequence when the emissions are 3/4 and 'abc'.::
844

845
            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[0.9,0.1],[0.1,0.9]], [0.5,0.5], [3/4, 'abc'])
846

847
        Note that state 0 is common below, despite the model trying hard to
848
        switch to state 1::
849

850
            sage: a.viterbi([3/4, 'abc', 'abc'] + [3/4]*10)
851
            ([0, 1, 1, 0, 0 ... 0, 0, 0, 0, 0], -25.299405845367794)
852
        """
853
        if self._emission_symbols is not None:
854
            obs = self._emission_symbols_to_IntList(obs)
855
        elif not isinstance(obs, IntList):
856
            obs = IntList(obs)
857
        if log_scale:
858
            return self._viterbi_scale(obs)
859
        else:
860
            return self._viterbi(obs)
861

862
    cpdef _viterbi(self, IntList obs):
863
        """
864
        Used internally to compute the viterbi path, without
865
        rescaling.  This can be useful for short sequences.
866

867
        INPUT:
868

869
            - ``obs`` -- IntList
870

871
        OUTPUT:
872

873
            - IntList (most likely state sequence)
874

875
            - log of probability that the observed sequence was
876
              produced by the Viterbi sequence of states.
877

878
        EXAMPLES::
879

880
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.9,0.1]], [[1,0],[0,1]], [.2,.8])
881
            sage: m._viterbi(stats.IntList([1]*5))
882
            ([1, 1, 1, 1, 1], -9.433483923290392)
883
            sage: m._viterbi(stats.IntList([0]*5))
884
            ([0, 0, 0, 0, 0], -10.819778284410283)
885

886
        Long sequences will overflow::
887

888
            sage: m._viterbi(stats.IntList([0]*1000))
889
            ([0, 0, 0, 0, 0 ... 0, 0, 0, 0, 0], -inf)
890
        """
891
        cdef Py_ssize_t t, T = obs._length
892
        cdef IntList state_sequence = IntList(T)
893
        if T == 0:
894
            return state_sequence, 0.0
895

896
        cdef int i, j, N = self.N
897

898
        # delta[i] is the maximum of the probabilities over all
899
        # paths ending in state i.
900
        cdef TimeSeries delta = TimeSeries(N), delta_prev = TimeSeries(N)
901

902
        # We view psi as an N x T matrix of ints. The quantity
903
        #          psi[N*t + j]
904
        # is a most probable hidden state at time t-1, given
905
        # that j is a most probable state at time j.
906
        cdef IntList psi = IntList(N * T)  # initialized to 0 by default
907

908
        # Initialization:
909
        for i in range(N):
910
            delta._values[i] = self.pi._values[i] * self.B._values[self.n_out*i + obs._values[0]]
911

912
        # Recursion:
913
        cdef double mx, tmp
914
        cdef int index
915
        for t in range(1, T):
916
            delta_prev, delta = delta, delta_prev
917
            for j in range(N):
918
                # delta_t[j] = max_i(delta_{t-1}(i) a_{i,j}) * b_j(obs[t])
919
                mx = -1
920
                index = -1
921
                for i in range(N):
922
                    tmp = delta_prev._values[i]*self.A._values[i*N+j]
923
                    if tmp > mx:
924
                        mx = tmp
925
                        index = i
926
                delta._values[j] = mx * self.B._values[self.n_out*j + obs._values[t]]
927
                psi._values[N*t + j] = index
928

929
        # Termination:
930
        mx, index = delta.max(index=True)
931

932
        # Path (state sequence) backtracking:
933
        state_sequence._values[T-1] = index
934
        t = T-2
935
        while t >= 0:
936
            state_sequence._values[t] = psi._values[N*(t+1) + state_sequence._values[t+1]]
937
            t -= 1
938

939
        return state_sequence, log(mx)
940

941

942
    cpdef _viterbi_scale(self, IntList obs):
943
        """
944
        Used internally to compute the viterbi path with rescaling.
945

946
        INPUT:
947

948
            - obs -- IntList
949

950
        OUTPUT:
951

952
            - IntList (most likely state sequence)
953

954
            - log of probability that the observed sequence was
955
              produced by the Viterbi sequence of states.
956

957
        EXAMPLES::
958

959
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.9,0.1]], [[.5,.5],[0,1]], [.2,.8])
960
            sage: m._viterbi_scale(stats.IntList([1]*10))
961
            ([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], -4.637124095034373)
962

963
        Long sequences should not overflow::
964

965
            sage: m._viterbi_scale(stats.IntList([1]*1000))
966
            ([1, 0, 1, 0, 1 ... 0, 1, 0, 1, 0], -452.05188897345...)
967
        """
968
        # The algorithm is the same as _viterbi above, except
969
        # we take the logarithms of everything first, and add
970
        # instead of multiply.
971
        cdef Py_ssize_t t, T = obs._length
972
        cdef IntList state_sequence = IntList(T)
973
        if T == 0:
974
            return state_sequence, 0.0
975
        cdef int i, j, N = self.N
976

977
        # delta[i] is the maximum of the probabilities over all
978
        # paths ending in state i.
979
        cdef TimeSeries delta = TimeSeries(N), delta_prev = TimeSeries(N)
980

981
        # We view psi as an N x T matrix of ints. The quantity
982
        #          psi[N*t + j]
983
        # is a most probable hidden state at time t-1, given
984
        # that j is a most probable state at time j.
985
        cdef IntList psi = IntList(N * T)  # initialized to 0 by default
986

987
        # Log Preprocessing
988
        cdef TimeSeries A = self.A.log()
989
        cdef TimeSeries B = self.B.log()
990
        cdef TimeSeries pi = self.pi.log()
991

992
        # Initialization:
993
        for i in range(N):
994
            delta._values[i] = pi._values[i] + B._values[self.n_out*i + obs._values[0]]
995

996
        # Recursion:
997
        cdef double mx, tmp, minus_inf = float('-inf')
998
        cdef int index
999

1000
        for t in range(1, T):
1001
            delta_prev, delta = delta, delta_prev
1002
            for j in range(N):
1003
                # delta_t[j] = max_i(delta_{t-1}(i) a_{i,j}) * b_j(obs[t])
1004
                mx = minus_inf
1005
                index = -1
1006
                for i in range(N):
1007
                    tmp = delta_prev._values[i] + A._values[i*N+j]
1008
                    if tmp > mx:
1009
                        mx = tmp
1010
                        index = i
1011
                delta._values[j] = mx + B._values[self.n_out*j + obs._values[t]]
1012
                psi._values[N*t + j] = index
1013

1014
        # Termination:
1015
        mx, index = delta.max(index=True)
1016

1017
        # Path (state sequence) backtracking:
1018
        state_sequence._values[T-1] = index
1019
        t = T-2
1020
        while t >= 0:
1021
            state_sequence._values[t] = psi._values[N*(t+1) + state_sequence._values[t+1]]
1022
            t -= 1
1023

1024
        return state_sequence, mx
1025

1026
    cdef TimeSeries _backward_scale_all(self, IntList obs, TimeSeries scale):
1027
        r"""
1028
        Return the scaled matrix of values `\beta_t(i)` that appear in
1029
        the backtracking algorithm.  This function is used internally
1030
        by the Baum-Welch algorithm.
1031

1032
        The matrix is stored as a TimeSeries T, such that
1033
        `\beta_t(i) = T[t*N + i]` where N is the number of states of
1034
        the Hidden Markov Model.
1035

1036
        The quantity beta_t(i) is the probability of observing the
1037
        sequence obs[t+1:] assuming that the model is in state i at
1038
        time t.
1039

1040
        INPUT:
1041

1042
            - ``obs`` -- IntList
1043
            - ``scale`` -- series that is *changed* in place, so that
1044
              after calling this function, scale[t] is value that is
1045
              used to scale each of the `\beta_t(i)`.
1046

1047
        OUTPUT:
1048

1049
            - a TimeSeries of values beta_t(i).
1050
            - the input object scale is modified
1051
        """
1052
        cdef Py_ssize_t t, T = obs._length
1053
        cdef int N = self.N, i, j
1054
        cdef double s
1055
        cdef TimeSeries beta = TimeSeries(N*T, initialize=False)
1056

1057
        # 1. Initialization
1058
        for i in range(N):
1059
            beta._values[(T-1)*N + i] = 1/scale._values[T-1]
1060

1061
        # 2. Induction
1062
        t = T-2
1063
        while t >= 0:
1064
            for i in range(N):
1065
                s = 0
1066
                for j in range(N):
1067
                    s += self.A._values[i*N+j] * \
1068
                         self.B._values[j*self.n_out+obs._values[t+1]] * beta._values[(t+1)*N+j]
1069
                beta._values[t*N + i] = s/scale._values[t]
1070
            t -= 1
1071
        return beta
1072

1073
    cdef _forward_scale_all(self, IntList obs):
1074
        """
1075
        Return scaled values alpha_t(i), the sequence of scalings, and
1076
        the log probability.
1077

1078
        INPUT:
1079

1080
            - ``obs`` -- IntList
1081

1082
        OUTPUT:
1083

1084
            - TimeSeries alpha with alpha_t(i) = alpha[t*N + i]
1085
            - TimeSeries scale with scale[t] the scaling at step t
1086
            - float -- log_probability of the observation sequence
1087
              being produced by the model.
1088
        """
1089
        cdef Py_ssize_t i, j, t, T = len(obs)
1090
        cdef int N = self.N
1091

1092
        # The running some of the log probabilities
1093
        cdef double log_probability = 0, sum, a
1094

1095
        cdef TimeSeries alpha = TimeSeries(N*T, initialize=False)
1096
        cdef TimeSeries scale = TimeSeries(T, initialize=False)
1097

1098
        # Initialization
1099
        sum = 0
1100
        for i in range(self.N):
1101
            a = self.pi._values[i] * self.B._values[i*self.n_out + obs._values[0]]
1102
            alpha._values[0*N + i] = a
1103
            sum += a
1104

1105
        scale._values[0] = sum
1106
        log_probability = log(sum)
1107
        for i in range(self.N):
1108
            alpha._values[0*N + i] /= sum
1109

1110
        # Induction
1111
        # The code below is just an optimized version of:
1112
        #     alpha = (alpha * A).pairwise_product(B[O[t+1]])
1113
        #     alpha = alpha.scale(1/alpha.sum())
1114
        # along with keeping track of the log of the scaling factor.
1115
        cdef double s
1116
        for t in range(1,T):
1117
            sum = 0
1118
            for j in range(N):
1119
                s = 0
1120
                for i in range(N):
1121
                    s += alpha._values[(t-1)*N + i] * self.A._values[i*N + j]
1122
                a = s * self.B._values[j*self.n_out + obs._values[t]]
1123
                alpha._values[t*N + j] = a
1124
                sum += a
1125

1126
            log_probability += log(sum)
1127
            scale._values[t] = sum
1128
            for j in range(self.N):
1129
                alpha._values[t*N + j] /= sum
1130

1131
        # Termination
1132
        return alpha, scale, log_probability
1133

1134
    cdef TimeSeries _baum_welch_xi(self, TimeSeries alpha, TimeSeries beta, IntList obs):
1135
        """
1136
        Used internally to compute the scaled quantity xi_t(i,j)
1137
        appearing in the Baum-Welch reestimation algorithm.
1138

1139
        INPUT:
1140

1141
            - ``alpha`` -- TimeSeries as output by the scaled forward algorithm
1142
            - ``beta`` -- TimeSeries as output by the scaled backward algorithm
1143
            - ``obs ``-- IntList of observations
1144

1145
        OUTPUT:
1146

1147
            - TimeSeries xi such that xi[t*N*N + i*N + j] = xi_t(i,j).
1148
        """
1149
        cdef int i, j, N = self.N
1150
        cdef double sum
1151
        cdef Py_ssize_t t, T = alpha._length//N
1152
        cdef TimeSeries xi = TimeSeries(T*N*N, initialize=False)
1153
        sig_on()
1154
        for t in range(T-1):
1155
            sum = 0.0
1156
            for i in range(N):
1157
                for j in range(N):
1158
                    xi._values[t*N*N + i*N + j] = alpha._values[t*N + i]*beta._values[(t+1)*N + j]*\
1159
                       self.A._values[i*N + j] * self.B._values[j*self.n_out + obs._values[t+1]]
1160
                    sum += xi._values[t*N*N + i*N + j]
1161
            for i in range(N):
1162
                for j in range(N):
1163
                    xi._values[t*N*N + i*N + j] /= sum
1164
        sig_off()
1165
        return xi
1166

1167
    def baum_welch(self, obs, int max_iter=100, double log_likelihood_cutoff=1e-4, bint fix_emissions=False):
1168
        """
1169
        Given an observation sequence obs, improve this HMM using the
1170
        Baum-Welch algorithm to increase the probability of observing obs.
1171

1172
        INPUT:
1173

1174
            - ``obs`` -- list of emissions
1175

1176
            - ``max_iter`` -- integer (default: 100) maximum number
1177
              of Baum-Welch steps to take
1178

1179
            - ``log_likelihood_cutoff`` -- positive float (default: 1e-4);
1180
              the minimal improvement in likelihood with respect to
1181
              the last iteration required to continue. Relative value
1182
              to log likelihood.
1183

1184
            - ``fix_emissions`` -- bool (default: False); if True, do not
1185
              change emissions when updating
1186

1187
        OUTPUT:
1188

1189
            - changes the model in places, and returns the log
1190
              likelihood and number of iterations.
1191

1192
        EXAMPLES::
1193

1194
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.9,0.1]], [[.5,.5],[0,1]], [.2,.8])
1195
            sage: m.baum_welch([1,0]*20, log_likelihood_cutoff=0)
1196
            (0.0, 4)
1197
            sage: m
1198
            Discrete Hidden Markov Model with 2 States and 2 Emissions
1199
            Transition matrix:
1200
            [1.35152697077e-51               1.0]
1201
            [              1.0               0.0]
1202
            Emission matrix:
1203
            [              1.0 6.46253713885e-52]
1204
            [              0.0               1.0]
1205
            Initial probabilities: [0.0000, 1.0000]
1206

1207
        The following illustrates how Baum-Welch is only a local
1208
        optimizer, i.e., the above model is far more likely to produce
1209
        the sequence [1,0]*20 than the one we get below::
1210

1211
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[.5,.5],[.5,.5]], [.5,.5])
1212
            sage: m.baum_welch([1,0]*20, log_likelihood_cutoff=0)
1213
            (-27.725887222397784, 1)
1214
            sage: m
1215
            Discrete Hidden Markov Model with 2 States and 2 Emissions
1216
            Transition matrix:
1217
            [0.5 0.5]
1218
            [0.5 0.5]
1219
            Emission matrix:
1220
            [0.5 0.5]
1221
            [0.5 0.5]
1222
            Initial probabilities: [0.5000, 0.5000]
1223

1224
        We illustrate fixing emissions::
1225

1226
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.9,0.1]], [[.5,.5],[.2,.8]], [.2,.8])
1227
            sage: set_random_seed(0); v = m.sample(100)
1228
            sage: m.baum_welch(v,fix_emissions=True)
1229
            (-66.98630856918774, 100)
1230
            sage: m.emission_matrix()
1231
            [0.5 0.5]
1232
            [0.2 0.8]
1233
            sage: m = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.9,0.1]], [[.5,.5],[.2,.8]], [.2,.8])
1234
            sage: m.baum_welch(v)
1235
            (-66.7823606592935..., 100)
1236
            sage: m.emission_matrix()
1237
            [0.530308574863 0.469691425137]
1238
            [0.290977555017 0.709022444983]
1239
        """
1240
        if self._emission_symbols is not None:
1241
            obs = self._emission_symbols_to_IntList(obs)
1242
        elif not isinstance(obs, IntList):
1243
            obs = IntList(obs)
1244
        cdef IntList _obs = obs
1245
        cdef TimeSeries alpha, beta, scale, gamma, xi
1246
        cdef double log_probability, log_probability_prev, delta
1247
        cdef int i, j, k, N, n_iterations
1248
        cdef Py_ssize_t t, T
1249
        cdef double denominator_A, numerator_A, denominator_B, numerator_B
1250

1251
        # Initialization
1252
        alpha, scale, log_probability = self._forward_scale_all(_obs)
1253
        beta = self._backward_scale_all(_obs, scale)
1254
        gamma = self._baum_welch_gamma(alpha, beta)
1255
        xi = self._baum_welch_xi(alpha, beta, _obs)
1256
        log_probability_prev = log_probability
1257
        N = self.N
1258
        n_iterations = 0
1259
        T = len(_obs)
1260

1261
        # Re-estimation
1262
        while True:
1263

1264
            # Reestimate frequency of state i in time t=0
1265
            for i in range(N):
1266
                self.pi._values[i] = gamma._values[0*N+i]
1267

1268
            # Reestimate transition matrix and emissions probability in
1269
            # each state.
1270
            for i in range(N):
1271
                denominator_A = 0.0
1272
                for t in range(T-1):
1273
                    denominator_A += gamma._values[t*N+i]
1274
                for j in range(N):
1275
                    numerator_A = 0.0
1276
                    for t in range(T-1):
1277
                        numerator_A += xi._values[t*N*N+i*N+j]
1278
                    self.A._values[i*N+j] = numerator_A / denominator_A
1279

1280
                if not fix_emissions:
1281
                    denominator_B = denominator_A + gamma._values[(T-1)*N + i]
1282
                    for k in range(self.n_out):
1283
                        numerator_B = 0.0
1284
                        for t in range(T):
1285
                            if _obs._values[t] == k:
1286
                                numerator_B += gamma._values[t*N + i]
1287
                        self.B._values[i*self.n_out + k] = numerator_B / denominator_B
1288

1289
            # Initialization
1290
            alpha, scale, log_probability = self._forward_scale_all(_obs)
1291
            beta = self._backward_scale_all(_obs, scale)
1292
            gamma = self._baum_welch_gamma(alpha, beta)
1293
            xi = self._baum_welch_xi(alpha, beta, _obs)
1294

1295
            # Compute the difference between the log probability of
1296
            # two iterations.
1297
            delta = log_probability - log_probability_prev
1298
            log_probability_prev = log_probability
1299
            n_iterations += 1
1300

1301
            # If the log probability does not change by more than
1302
            # delta, then terminate
1303
            if delta <= log_likelihood_cutoff or n_iterations >= max_iter:
1304
                break
1305

1306
        return log_probability, n_iterations
1307

1308

1309
# Keep this -- it's for backwards compatibility with the GHMM based implementation
1310
def unpickle_discrete_hmm_v0(A, B, pi, emission_symbols, name):
1311
    """
1312
    TESTS::
1313

1314
        sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[0.5,0.5]], [1,0])
1315
        sage: sage.stats.hmm.hmm.unpickle_discrete_hmm_v0(m.transition_matrix(), m.emission_matrix(), m.initial_probabilities(), ['a','b'], 'test model')
1316
        Discrete Hidden Markov Model with 2 States and 2 Emissions...
1317
    """
1318
    return DiscreteHiddenMarkovModel(A,B,pi,emission_symbols,normalize=False)
1319

1320
def unpickle_discrete_hmm_v1(A, B, pi, n_out, emission_symbols, emission_symbols_dict):
1321
    """
1322
    TESTS::
1323

1324
        sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[0.5,0.5]], [1,0],['a','b'])
1325
        sage: loads(dumps(m)) == m   # indirect test
1326
        True
1327
    """
1328
    cdef DiscreteHiddenMarkovModel m = PY_NEW(DiscreteHiddenMarkovModel)
1329
    m.A = A
1330
    m.B = B
1331
    m.pi = pi
1332
    m.n_out = n_out
1333
    m._emission_symbols = emission_symbols
1334
    m._emission_symbols_dict = emission_symbols_dict
1335
    return m
1336

1337

1338

1339
Product

Resources

Company