Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sagesmc
Path: blob/master/src/sage/stats/hmm/chmm.pyx
8818 views
1
"""
2
Continuous Emission Hidden Markov Models
3
4
AUTHOR:
5
6
- William Stein, 2010-03
7
"""
8
9
#############################################################################
10
# Copyright (C) 2010 William Stein <[email protected]>
11
# Distributed under the terms of the GNU General Public License (GPL)
12
# The full text of the GPL is available at:
13
# http://www.gnu.org/licenses/
14
#############################################################################
15
16
include "sage/ext/stdsage.pxi"
17
include "sage/ext/cdefs.pxi"
18
include "sage/ext/interrupt.pxi"
19
20
cdef extern from "math.h":
21
double log(double)
22
double sqrt(double)
23
double exp(double)
24
int isnormal(double)
25
int isfinite(double)
26
27
import math
28
29
from sage.misc.flatten import flatten
30
from sage.matrix.matrix import is_Matrix
31
32
cdef double sqrt2pi = sqrt(2*math.pi)
33
34
from sage.finance.time_series cimport TimeSeries
35
from sage.stats.intlist cimport IntList
36
37
from hmm cimport HiddenMarkovModel
38
from util cimport HMM_Util
39
from distributions cimport GaussianMixtureDistribution
40
41
cdef HMM_Util util = HMM_Util()
42
43
from sage.misc.randstate cimport current_randstate, randstate
44
45
46
# TODO: DELETE THIS FUNCTION WHEN MOVE Gaussian stuff to distributions.pyx!!! (next version)
47
cdef double random_normal(double mean, double std, randstate rstate):
48
"""
49
Return a number chosen randomly with given mean and standard deviation.
50
51
INPUT:
52
53
- ``mean`` -- double
54
- ``std`` -- double, standard deviation
55
- ``rstate`` -- a randstate object
56
57
OUTPUT:
58
59
- a double
60
"""
61
# Ported from http://users.tkk.fi/~nbeijar/soft/terrain/source_o2/boxmuller.c
62
# This the box muller algorithm.
63
# Client code can get the current random state from:
64
# cdef randstate rstate = current_randstate()
65
66
cdef double x1, x2, w, y1, y2
67
while True:
68
x1 = 2*rstate.c_rand_double() - 1
69
x2 = 2*rstate.c_rand_double() - 1
70
w = x1*x1 + x2*x2
71
if w < 1: break
72
w = sqrt( (-2*log(w))/w )
73
y1 = x1 * w
74
return mean + y1*std
75
76
cdef class GaussianHiddenMarkovModel(HiddenMarkovModel):
77
"""
78
GaussianHiddenMarkovModel(A, B, pi)
79
80
Gaussian emissions Hidden Markov Model.
81
82
INPUT:
83
84
- ``A`` -- matrix; the N x N transition matrix
85
- ``B`` -- list of pairs (mu,sigma) that define the distributions
86
- ``pi`` -- initial state probabilities
87
- ``normalize`` --bool (default: True)
88
89
EXAMPLES:
90
91
We illustrate the primary functions with an example 2-state Gaussian HMM::
92
93
sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,1), (-1,1)], [.5,.5]); m
94
Gaussian Hidden Markov Model with 2 States
95
Transition matrix:
96
[0.1 0.9]
97
[0.5 0.5]
98
Emission parameters:
99
[(1.0, 1.0), (-1.0, 1.0)]
100
Initial probabilities: [0.5000, 0.5000]
101
102
We query the defining transition matrix, emission parameters, and
103
initial state probabilities::
104
105
sage: m.transition_matrix()
106
[0.1 0.9]
107
[0.5 0.5]
108
sage: m.emission_parameters()
109
[(1.0, 1.0), (-1.0, 1.0)]
110
sage: m.initial_probabilities()
111
[0.5000, 0.5000]
112
113
We obtain a sample sequence with 10 entries in it, and compute the
114
logarithm of the probability of obtaining his sequence, given the
115
model::
116
117
sage: obs = m.sample(10); obs
118
[-1.6835, 0.0635, -2.1688, 0.3043, -0.3188, -0.7835, 1.0398, -1.3558, 1.0882, 0.4050]
119
sage: m.log_likelihood(obs)
120
-15.2262338077988...
121
122
We compute the Viterbi path, and probability that the given path
123
of states produced obs::
124
125
sage: m.viterbi(obs)
126
([1, 0, 1, 0, 1, 1, 0, 1, 0, 1], -16.67738270170788)
127
128
We use the Baum-Welch iterative algorithm to find another model
129
for which our observation sequence is more likely::
130
131
sage: m.baum_welch(obs)
132
(-10.6103334957397..., 14)
133
sage: m.log_likelihood(obs)
134
-10.6103334957397...
135
136
Notice that running Baum-Welch changed our model::
137
138
sage: m
139
Gaussian Hidden Markov Model with 2 States
140
Transition matrix:
141
[ 0.415498136619 0.584501863381]
142
[ 0.999999317425 6.82574625899e-07]
143
Emission parameters:
144
[(0.417888242712, 0.517310966436), (-1.50252086313, 0.508551283606)]
145
Initial probabilities: [0.0000, 1.0000]
146
"""
147
cdef TimeSeries B, prob
148
cdef int n_out
149
150
def __init__(self, A, B, pi, bint normalize=True):
151
"""
152
Create a Gaussian emissions HMM with transition probability
153
matrix A, normal emissions given by B, and initial state
154
probability distribution pi.
155
156
INPUT::
157
158
- A -- a list of lists or a square N x N matrix, whose
159
(i,j) entry gives the probability of transitioning from
160
state i to state j.
161
162
- B -- a list of N pairs (mu,std), where if B[i]=(mu,std),
163
then the probability distribution associated with state i
164
normal with mean mu and standard deviation std.
165
166
- pi -- the probabilities of starting in each initial
167
state, i.e,. pi[i] is the probability of starting in
168
state i.
169
170
- normalize --bool (default: True); if given, input is
171
normalized to define valid probability distributions,
172
e.g., the entries of A are made nonnegative and the rows
173
sum to 1.
174
175
EXAMPLES::
176
177
178
sage: hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,1), (-1,1)], [.5,.5])
179
Gaussian Hidden Markov Model with 2 States
180
Transition matrix:
181
[0.1 0.9]
182
[0.5 0.5]
183
Emission parameters:
184
[(1.0, 1.0), (-1.0, 1.0)]
185
Initial probabilities: [0.5000, 0.5000]
186
187
We input a model in which both A and pi have to be
188
renormalized to define valid probability distributions::
189
190
sage: hmm.GaussianHiddenMarkovModel([[-1,.7],[.3,.4]], [(1,1), (-1,1)], [-1,.3])
191
Gaussian Hidden Markov Model with 2 States
192
Transition matrix:
193
[ 0.0 1.0]
194
[0.428571428571 0.571428571429]
195
Emission parameters:
196
[(1.0, 1.0), (-1.0, 1.0)]
197
Initial probabilities: [0.0000, 1.0000]
198
199
Bad things can happen::
200
201
sage: hmm.GaussianHiddenMarkovModel([[-1,.7],[.3,.4]], [(1,1), (-1,1)], [-1,.3], normalize=False)
202
Gaussian Hidden Markov Model with 2 States
203
Transition matrix:
204
[-1.0 0.7]
205
[ 0.3 0.4]
206
...
207
"""
208
self.pi = util.initial_probs_to_TimeSeries(pi, normalize)
209
self.N = len(self.pi)
210
self.A = util.state_matrix_to_TimeSeries(A, self.N, normalize)
211
212
# B should be a matrix of N rows, with column 0 the mean and 1
213
# the standard deviation.
214
if is_Matrix(B):
215
B = B.list()
216
else:
217
B = flatten(B)
218
self.B = TimeSeries(B)
219
self.probability_init()
220
221
def __cmp__(self, other):
222
"""
223
Compare self and other, which must both be GaussianHiddenMarkovModel's.
224
225
EXAMPLES::
226
227
sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1])
228
sage: n = hmm.GaussianHiddenMarkovModel([[1]], [(1,1)], [1])
229
sage: m < n
230
True
231
sage: m == m
232
True
233
sage: n > m
234
True
235
sage: n < m
236
False
237
"""
238
if not isinstance(other, GaussianHiddenMarkovModel):
239
raise ValueError
240
return cmp(self.__reduce__()[1], other.__reduce__()[1])
241
242
def __getitem__(self, Py_ssize_t i):
243
"""
244
Return the mean and standard distribution for the i-th state.
245
246
INPUT:
247
248
- i -- integer
249
250
OUTPUT:
251
252
- 2 floats
253
254
EXAMPLES::
255
256
sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-2,.3)], [.5,.5])
257
sage: m[0]
258
(1.0, 0.5)
259
sage: m[1]
260
(-2.0, 0.3)
261
sage: m[-1]
262
(-2.0, 0.3)
263
sage: m[3]
264
Traceback (most recent call last):
265
...
266
IndexError: index out of range
267
sage: m[-3]
268
Traceback (most recent call last):
269
...
270
IndexError: index out of range
271
"""
272
if i < 0:
273
i += self.N
274
if i < 0 or i >= self.N:
275
raise IndexError, 'index out of range'
276
277
# TODO: change to be a normal distribution class (next version)
278
return self.B[2*i], self.B[2*i+1]
279
280
def __reduce__(self):
281
"""
282
Used in pickling.
283
284
EXAMPLES::
285
286
sage: G = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1])
287
sage: loads(dumps(G)) == G
288
True
289
"""
290
return unpickle_gaussian_hmm_v1, \
291
(self.A, self.B, self.pi, self.prob, self.n_out)
292
293
def emission_parameters(self):
294
"""
295
Return the parameters that define the normal distributions
296
associated to all of the states.
297
298
OUTPUT:
299
300
- a list B of pairs B[i] = (mu, std), such that the
301
distribution associated to state i is normal with mean
302
mu and standard deviation std.
303
304
EXAMPLES::
305
306
sage: hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9]).emission_parameters()
307
[(1.0, 0.5), (-1.0, 3.0)]
308
"""
309
cdef Py_ssize_t i
310
from sage.rings.all import RDF
311
return [(RDF(self.B[2*i]),RDF(self.B[2*i+1])) for i in range(self.N)]
312
313
def __repr__(self):
314
"""
315
Return string representation.
316
317
EXAMPLES::
318
319
sage: hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9]).__repr__()
320
'Gaussian Hidden Markov Model with 2 States\nTransition matrix:\n[0.1 0.9]\n[0.5 0.5]\nEmission parameters:\n[(1.0, 0.5), (-1.0, 3.0)]\nInitial probabilities: [0.1000, 0.9000]'
321
"""
322
s = "Gaussian Hidden Markov Model with %s States"%self.N
323
s += '\nTransition matrix:\n%s'%self.transition_matrix()
324
s += '\nEmission parameters:\n%s'%self.emission_parameters()
325
s += '\nInitial probabilities: %s'%self.initial_probabilities()
326
return s
327
328
329
def generate_sequence(self, Py_ssize_t length, starting_state=None):
330
"""
331
Return a sample of the given length from this HMM.
332
333
INPUT:
334
335
- length -- positive integer
336
- starting_state -- int (or None); if specified then generate
337
a sequence using this model starting with the given state
338
instead of the initial probabilities to determine the
339
starting state.
340
341
OUTPUT:
342
343
- an IntList or list of emission symbols
344
- TimeSeries of emissions
345
346
EXAMPLES::
347
348
sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
349
sage: m.generate_sequence(5)
350
([-3.0505, 0.5317, -4.5065, 0.6521, 1.0435], [1, 0, 1, 0, 1])
351
sage: m.generate_sequence(0)
352
([], [])
353
sage: m.generate_sequence(-1)
354
Traceback (most recent call last):
355
...
356
ValueError: length must be nonnegative
357
358
Example in which the starting state is 0 (see trac 11452)::
359
360
sage: set_random_seed(23); m.generate_sequence(2)
361
([0.6501, -2.0151], [0, 1])
362
363
Force a starting state of 1 even though as we saw above it would be 0::
364
365
sage: set_random_seed(23); m.generate_sequence(2, starting_state=1)
366
([-3.1491, -1.0244], [1, 1])
367
368
Verify numerically that the starting state is 0 with probability about 0.1::
369
370
sage: set_random_seed(0)
371
sage: v = [m.generate_sequence(1)[1][0] for i in range(10^5)]
372
sage: 1.0 * v.count(int(0)) / len(v)
373
0.0998200000000000
374
"""
375
if length < 0:
376
raise ValueError, "length must be nonnegative"
377
378
# Create Integer lists for states and observations
379
cdef IntList states = IntList(length)
380
cdef TimeSeries obs = TimeSeries(length)
381
if length == 0:
382
return states, obs
383
384
# Setup variables, including random state.
385
cdef Py_ssize_t i, j
386
cdef randstate rstate = current_randstate()
387
cdef int q = 0
388
cdef double r, accum
389
390
# Choose the starting state.
391
# See the remark in hmm.pyx about how this should get
392
# replaced by some general fast discrete distribution code.
393
if starting_state is None:
394
r = rstate.c_rand_double()
395
accum = 0
396
for i in range(self.N):
397
if r < self.pi._values[i] + accum:
398
q = i
399
break
400
else:
401
accum += self.pi._values[i]
402
else:
403
q = starting_state
404
if q < 0 or q>= self.N:
405
raise ValueError, "starting state must be between 0 and %s"%(self.N-1)
406
407
states._values[0] = q
408
obs._values[0] = self.random_sample(q, rstate)
409
410
cdef double* row
411
cdef int O
412
sig_on()
413
for i in range(1, length):
414
accum = 0
415
row = self.A._values + q*self.N
416
r = rstate.c_rand_double()
417
for j in range(self.N):
418
if r < row[j] + accum:
419
q = j
420
break
421
else:
422
accum += row[j]
423
states._values[i] = q
424
obs._values[i] = self.random_sample(q, rstate)
425
sig_off()
426
427
return obs, states
428
429
cdef probability_init(self):
430
"""
431
Used internally to compute caching information that makes
432
certain computations in the Baum-Welch algorithm faster. This
433
function has no input or output.
434
"""
435
self.prob = TimeSeries(2*self.N)
436
cdef int i
437
for i in range(self.N):
438
self.prob[2*i] = 1.0/(sqrt2pi*self.B[2*i+1])
439
self.prob[2*i+1] = -1.0/(2*self.B[2*i+1]*self.B[2*i+1])
440
441
cdef double random_sample(self, int state, randstate rstate):
442
"""
443
Return a random sample from the normal distribution associated
444
to the given state.
445
446
This is only used internally, and no bounds or other error
447
checking is done, so calling this improperly can lead to seg
448
faults.
449
450
INPUT:
451
452
- state -- integer
453
- rstate -- randstate instance
454
455
OUTPUT:
456
457
- double
458
"""
459
return random_normal(self.B._values[state*2], self.B._values[state*2+1], rstate)
460
461
cdef double probability_of(self, int state, double observation):
462
"""
463
Return a useful continuous analogue of "the probability b_j(o)"
464
of seeing the given observation given that we're in the given
465
state j (=state).
466
467
The distribution is a normal distribution, and we're asking
468
about the probability of a particular point being observed;
469
the probability of a particular point is 0, which is not
470
useful. Thus we instead consider the limit p = prob([o,o+d])/d
471
as d goes to 0. There is a simple closed form formula for p,
472
derived in the source code. Note that p can be bigger than 1;
473
for example, if we set observation=mean in the closed formula
474
we get p=1/(sqrt(2*pi)*std), so p>1 when std<1/sqrt(2*pi).
475
476
INPUT:
477
478
- state -- integer
479
- observation -- double
480
481
OUTPUT:
482
483
- double
484
"""
485
# The code below is an optimized version of the following code:
486
# cdef double mean = self.B._values[2*state], \
487
# std = self.B._values[2*state+1]
488
# return 1/(sqrt2pi*std) * \
489
# exp(-(observation-mean)*(observation-mean)/(2*std*std))
490
#
491
# Here is how to use Sage to verify that the above formula computes
492
# the limit claimed above:
493
#
494
# var('x,d,obs,mean,std')
495
# n = 1/sqrt(2*pi*std^2) * exp(-(x-mean)^2/(2*std^2))
496
# assume(std>0); assume(d>0)
497
# m = n.integrate(x,obs,obs+d)/d
498
# p = SR(m.limit(d=0).simplify_full())
499
# q = 1/(sqrt(2*pi)*std) * exp(-(obs-mean)*(obs-mean)/(2*std*std))
500
# bool(p==q) # outputs True
501
502
cdef double x = observation - self.B._values[2*state] # observation - mean
503
return self.prob._values[2*state] * exp(x*x*self.prob._values[2*state+1])
504
505
def log_likelihood(self, obs):
506
"""
507
Return the logarithm of a continuous analogue of the
508
probability that this model produced the given observation
509
sequence.
510
511
Note that the "continuous analogue of the probability" above can
512
be bigger than 1, hence the logarithm can be positive.
513
514
INPUT:
515
516
- obs -- sequence of observations
517
518
OUTPUT:
519
520
- float
521
522
EXAMPLES::
523
524
sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
525
sage: m.log_likelihood([1,1,1])
526
-4.297880766072486
527
sage: set_random_seed(0); s = m.sample(20)
528
sage: m.log_likelihood(s)
529
-40.115714129484...
530
"""
531
if len(obs) == 0:
532
return 1.0
533
if not isinstance(obs, TimeSeries):
534
obs = TimeSeries(obs)
535
return self._forward_scale(obs)
536
537
def _forward_scale(self, TimeSeries obs):
538
"""
539
Memory-efficient implementation of the forward algorithm (with scaling).
540
541
INPUT:
542
543
- obs -- an integer list of observation states.
544
545
OUTPUT:
546
547
- float -- the log of the probability that the model
548
produced this sequence
549
550
EXAMPLES::
551
552
sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
553
sage: m._forward_scale(stats.TimeSeries([1,-1,-1,1]))
554
-7.641988207069133
555
"""
556
cdef Py_ssize_t i, j, t, T = len(obs)
557
558
# The running sum of the log probabilities
559
cdef double log_probability = 0, sum, a
560
561
cdef TimeSeries alpha = TimeSeries(self.N), \
562
alpha2 = TimeSeries(self.N)
563
564
# Initialization
565
sum = 0
566
for i in range(self.N):
567
a = self.pi[i] * self.probability_of(i, obs._values[0])
568
alpha[i] = a
569
sum += a
570
571
log_probability = log(sum)
572
alpha.rescale(1/sum)
573
574
# Induction
575
cdef double s
576
for t in range(1, T):
577
sum = 0
578
for j in range(self.N):
579
s = 0
580
for i in range(self.N):
581
s += alpha._values[i] * self.A._values[i*self.N + j]
582
a = s * self.probability_of(j, obs._values[t])
583
alpha2._values[j] = a
584
sum += a
585
586
log_probability += log(sum)
587
for j in range(self.N):
588
alpha._values[j] = alpha2._values[j] / sum
589
590
# Termination
591
return log_probability
592
593
def viterbi(self, obs):
594
"""
595
Determine "the" hidden sequence of states that is most likely
596
to produce the given sequence seq of observations, along with
597
the probability that this hidden sequence actually produced
598
the observation.
599
600
INPUT:
601
602
- seq -- sequence of emitted ints or symbols
603
604
OUTPUT:
605
606
- list -- "the" most probable sequence of hidden states, i.e.,
607
the Viterbi path.
608
609
- float -- log of probability that the observed sequence
610
was produced by the Viterbi sequence of states.
611
612
EXAMPLES:
613
614
We find the optimal state sequence for a given model::
615
616
sage: m = hmm.GaussianHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [(0,1),(10,1)], [0.5,0.5])
617
sage: m.viterbi([0,1,10,10,1])
618
([0, 0, 1, 1, 0], -9.0604285688230...)
619
620
Another example in which the most likely states change based
621
on the last observation::
622
623
sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
624
sage: m.viterbi([-2,-1,.1,0.1])
625
([1, 1, 0, 1], -9.61823698847639...)
626
sage: m.viterbi([-2,-1,.1,0.3])
627
([1, 1, 1, 0], -9.566023653378513)
628
"""
629
cdef TimeSeries _obs
630
if not isinstance(obs, TimeSeries):
631
_obs = TimeSeries(obs)
632
else:
633
_obs = obs
634
635
# The algorithm is the same as _viterbi above, except
636
# we take the logarithms of everything first, and add
637
# instead of multiply.
638
cdef Py_ssize_t t, T = _obs._length
639
cdef IntList state_sequence = IntList(T)
640
if T == 0:
641
return state_sequence, 0.0
642
643
cdef int i, j, N = self.N
644
645
# delta[i] is the maximum of the probabilities over all
646
# paths ending in state i.
647
cdef TimeSeries delta = TimeSeries(N), delta_prev = TimeSeries(N)
648
649
# We view psi as an N x T matrix of ints. The quantity
650
# psi[N*t + j]
651
# is a most probable hidden state at time t-1, given
652
# that j is a most probable state at time j.
653
cdef IntList psi = IntList(N * T) # initialized to 0 by default
654
655
# Log Preprocessing
656
cdef TimeSeries A = self.A.log()
657
cdef TimeSeries pi = self.pi.log()
658
659
# Initialization:
660
for i in range(N):
661
delta._values[i] = pi._values[i] + log(self.probability_of(i, _obs._values[0]))
662
663
# Recursion:
664
cdef double mx, tmp, minus_inf = float('-inf')
665
cdef int index
666
667
for t in range(1, T):
668
delta_prev, delta = delta, delta_prev
669
for j in range(N):
670
# Compute delta_t[j] = max_i(delta_{t-1}(i) a_{i,j}) * b_j(_obs[t])
671
mx = minus_inf
672
index = -1
673
for i in range(N):
674
tmp = delta_prev._values[i] + A._values[i*N+j]
675
if tmp > mx:
676
mx = tmp
677
index = i
678
delta._values[j] = mx + log(self.probability_of(j, _obs._values[t]))
679
psi._values[N*t + j] = index
680
681
# Termination:
682
mx, index = delta.max(index=True)
683
684
# Path (state sequence) backtracking:
685
state_sequence._values[T-1] = index
686
t = T-2
687
while t >= 0:
688
state_sequence._values[t] = psi._values[N*(t+1) + state_sequence._values[t+1]]
689
t -= 1
690
691
return state_sequence, mx
692
693
cdef TimeSeries _backward_scale_all(self, TimeSeries obs, TimeSeries scale):
694
"""
695
This function returns the matrix beta_t(i), and is used
696
internally as part of the Baum-Welch algorithm.
697
698
The quantity beta_t(i) is the probability of observing the
699
sequence obs[t+1:] assuming that the model is in state i at
700
time t.
701
702
INPUT:
703
704
- obs -- TimeSeries
705
- scale -- TimeSeries
706
707
OUTPUT:
708
709
- TimeSeries beta such that beta_t(i) = beta[t*N + i]
710
- scale is also changed by this function
711
"""
712
cdef Py_ssize_t t, T = obs._length
713
cdef int N = self.N, i, j
714
cdef double s
715
cdef TimeSeries beta = TimeSeries(N*T, initialize=False)
716
717
# 1. Initialization
718
for i in range(N):
719
beta._values[(T-1)*N + i] = 1 / scale._values[T-1]
720
721
# 2. Induction
722
t = T-2
723
while t >= 0:
724
for i in range(N):
725
s = 0
726
for j in range(N):
727
s += self.A._values[i*N+j] * \
728
self.probability_of(j, obs._values[t+1]) * beta._values[(t+1)*N+j]
729
beta._values[t*N + i] = s/scale._values[t]
730
t -= 1
731
return beta
732
733
cdef _forward_scale_all(self, TimeSeries obs):
734
"""
735
Return scaled values alpha_t(i), the sequence of scalings, and
736
the log probability.
737
738
The quantity alpha_t(i) is the probability of observing the
739
sequence obs[:t+1] assuming that the model is in state i at
740
time t.
741
742
INPUT:
743
744
- obs -- TimeSeries
745
746
OUTPUT:
747
748
- TimeSeries alpha with alpha_t(i) = alpha[t*N + i]
749
- TimeSeries scale with scale[t] the scaling at step t
750
- float -- log_probability of the observation sequence
751
being produced by the model.
752
"""
753
cdef Py_ssize_t i, j, t, T = len(obs)
754
cdef int N = self.N
755
756
# The running some of the log probabilities
757
cdef double log_probability = 0, sum, a
758
759
cdef TimeSeries alpha = TimeSeries(N*T, initialize=False)
760
cdef TimeSeries scale = TimeSeries(T, initialize=False)
761
762
# Initialization
763
sum = 0
764
for i in range(self.N):
765
a = self.pi._values[i] * self.probability_of(i, obs._values[0])
766
alpha._values[0*N + i] = a
767
sum += a
768
769
scale._values[0] = sum
770
log_probability = log(sum)
771
for i in range(self.N):
772
alpha._values[0*N + i] /= sum
773
774
# Induction
775
# The code below is just an optimized version of:
776
# alpha = (alpha * A).pairwise_product(B[O[t+1]])
777
# alpha = alpha.scale(1/alpha.sum())
778
# along with keeping track of the log of the scaling factor.
779
cdef double s
780
for t in range(1,T):
781
sum = 0
782
for j in range(N):
783
s = 0
784
for i in range(N):
785
s += alpha._values[(t-1)*N + i] * self.A._values[i*N + j]
786
a = s * self.probability_of(j, obs._values[t])
787
alpha._values[t*N + j] = a
788
sum += a
789
790
log_probability += log(sum)
791
scale._values[t] = sum
792
for j in range(self.N):
793
alpha._values[t*N + j] /= sum
794
795
# Termination
796
return alpha, scale, log_probability
797
798
cdef TimeSeries _baum_welch_xi(self, TimeSeries alpha, TimeSeries beta, TimeSeries obs):
799
"""
800
Used internally to compute the scaled quantity xi_t(i,j)
801
appearing in the Baum-Welch reestimation algorithm.
802
803
INPUT:
804
805
- alpha -- TimeSeries as output by the scaled forward algorithm
806
- beta -- TimeSeries as output by the scaled backward algorithm
807
- obs -- TimeSeries of observations
808
809
OUTPUT:
810
811
- TimeSeries xi such that xi[t*N*N + i*N + j] = xi_t(i,j).
812
"""
813
cdef int i, j, N = self.N
814
cdef double sum
815
cdef Py_ssize_t t, T = alpha._length//N
816
cdef TimeSeries xi = TimeSeries(T*N*N, initialize=False)
817
for t in range(T-1):
818
sum = 0.0
819
for i in range(N):
820
for j in range(N):
821
xi._values[t*N*N+i*N+j] = alpha._values[t*N+i]*beta._values[(t+1)*N+j]*\
822
self.A._values[i*N+j] * self.probability_of(j, obs._values[t+1])
823
sum += xi._values[t*N*N+i*N+j]
824
for i in range(N):
825
for j in range(N):
826
xi._values[t*N*N+i*N+j] /= sum
827
return xi
828
829
def baum_welch(self, obs, int max_iter=500, double log_likelihood_cutoff=1e-4,
830
double min_sd=0.01, bint fix_emissions=False, bint v=False):
831
"""
832
Given an observation sequence obs, improve this HMM using the
833
Baum-Welch algorithm to increase the probability of observing obs.
834
835
INPUT:
836
837
- obs -- a time series of emissions
838
839
- max_iter -- integer (default: 500) maximum number
840
of Baum-Welch steps to take
841
842
- log_likelihood_cutoff -- positive float (default: 1e-4);
843
the minimal improvement in likelihood with respect to
844
the last iteration required to continue. Relative value
845
to log likelihood.
846
847
- min_sd -- positive float (default: 0.01); when
848
reestimating, the standard deviation of emissions is not
849
allowed to be less than min_sd.
850
851
- fix_emissions -- bool (default: False); if True, do not
852
change emissions when updating
853
854
OUTPUT:
855
856
- changes the model in places, and returns the log
857
likelihood and number of iterations.
858
859
EXAMPLES::
860
861
sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
862
sage: m.log_likelihood([-2,-1,.1,0.1])
863
-8.858282215986275
864
sage: m.baum_welch([-2,-1,.1,0.1])
865
(4.534646052182..., 7)
866
sage: m.log_likelihood([-2,-1,.1,0.1])
867
4.534646052182...
868
sage: m
869
Gaussian Hidden Markov Model with 2 States
870
Transition matrix:
871
[ 0.999999999243 7.56983939444e-10]
872
[ 0.499984627912 0.500015372088]
873
Emission parameters:
874
[(0.1, 0.01), (-1.49995081476, 0.50007105049)]
875
Initial probabilities: [0.0000, 1.0000]
876
877
We illustrate bounding the standard deviation below. Note that above we had
878
different emission parameters when the min_sd was the default of 0.01::
879
880
sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
881
sage: m.baum_welch([-2,-1,.1,0.1], min_sd=1)
882
(-4.07939572755..., 32)
883
sage: m.emission_parameters()
884
[(-0.2663018798..., 1.0), (-1.99850979..., 1.0)]
885
886
We watch the log likelihoods of the model converge, step by step::
887
888
sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.5,.5]], [(1,.5), (-1,3)], [.1,.9])
889
sage: v = m.sample(10)
890
sage: stats.TimeSeries([m.baum_welch(v,max_iter=1)[0] for _ in range(len(v))])
891
[-20.1167, -17.7611, -16.9814, -16.9364, -16.9314, -16.9309, -16.9309, -16.9309, -16.9309, -16.9309]
892
893
We illustrate fixing emissions::
894
895
sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.9,.1]], [(1,2),(-1,.5)], [.3,.7])
896
sage: set_random_seed(0); v = m.sample(100)
897
sage: m.baum_welch(v,fix_emissions=True)
898
(-164.72944548204..., 23)
899
sage: m.emission_parameters()
900
[(1.0, 2.0), (-1.0, 0.5)]
901
sage: m = hmm.GaussianHiddenMarkovModel([[.1,.9],[.9,.1]], [(1,2),(-1,.5)], [.3,.7])
902
sage: m.baum_welch(v)
903
(-162.854370397998..., 49)
904
sage: m.emission_parameters()
905
[(1.27224191726, 2.37136875176), (-0.948617467518, 0.576236038512)]
906
"""
907
if not isinstance(obs, TimeSeries):
908
obs = TimeSeries(obs)
909
cdef TimeSeries _obs = obs
910
cdef TimeSeries alpha, beta, scale, gamma, xi
911
cdef double log_probability, log_probability0, log_probability_prev, delta
912
cdef int i, j, k, N, n_iterations
913
cdef Py_ssize_t t, T
914
cdef double denominator_A, numerator_A, denominator_B, numerator_mean, numerator_std
915
916
# Initialization
917
alpha, scale, log_probability0 = self._forward_scale_all(_obs)
918
if not isfinite(log_probability0):
919
return (0.0, 0)
920
log_probability = log_probability0
921
beta = self._backward_scale_all(_obs, scale)
922
gamma = self._baum_welch_gamma(alpha, beta)
923
xi = self._baum_welch_xi(alpha, beta, _obs)
924
log_probability_prev = log_probability
925
N = self.N
926
n_iterations = 0
927
T = len(_obs)
928
929
# Re-estimation
930
while True:
931
# Reestimate
932
for i in range(N):
933
if not isfinite(gamma._values[0*N+i]):
934
# Before raising an error, leave self in a valid state.
935
util.normalize_probability_TimeSeries(self.pi, 0, self.pi._length)
936
raise RuntimeError, "impossible to compute gamma during reestimation"
937
self.pi._values[i] = gamma._values[0*N+i]
938
939
# Update the probabilities pi to define a valid discrete distribution
940
util.normalize_probability_TimeSeries(self.pi, 0, self.pi._length)
941
942
# Reestimate transition matrix and emission probabilities in
943
# each state.
944
for i in range(N):
945
# Compute the updated transition matrix
946
denominator_A = 0.0
947
for t in range(T-1):
948
denominator_A += gamma._values[t*N+i]
949
if not isnormal(denominator_A):
950
raise RuntimeError, "unable to re-estimate transition matrix"
951
for j in range(N):
952
numerator_A = 0.0
953
for t in range(T-1):
954
numerator_A += xi._values[t*N*N+i*N+j]
955
self.A._values[i*N+j] = numerator_A / denominator_A
956
957
# Rescale the i-th row of the transition matrix to be
958
# a valid stochastic matrix:
959
util.normalize_probability_TimeSeries(self.A, i*N, (i+1)*N)
960
961
if not fix_emissions:
962
denominator_B = denominator_A + gamma._values[(T-1)*N + i]
963
if not isnormal(denominator_B):
964
raise RuntimeError, "unable to re-estimate emission probabilities"
965
966
numerator_mean = 0.0
967
numerator_std = 0.0
968
for t in range(T):
969
numerator_mean += gamma._values[t*N + i] * _obs._values[t]
970
numerator_std += gamma._values[t*N + i] * \
971
(_obs._values[t] - self.B._values[2*i])*(_obs._values[t] - self.B._values[2*i])
972
# restimated mean
973
self.B._values[2*i] = numerator_mean / denominator_B
974
# restimated standard deviation
975
self.B._values[2*i+1] = sqrt(numerator_std / denominator_B)
976
if self.B._values[2*i+1] < min_sd:
977
self.B._values[2*i+1] = min_sd
978
self.probability_init()
979
980
n_iterations += 1
981
if n_iterations >= max_iter: break
982
983
# Initialization for next iteration
984
alpha, scale, log_probability0 = self._forward_scale_all(_obs)
985
986
if not isfinite(log_probability0): break
987
log_probability = log_probability0
988
beta = self._backward_scale_all(_obs, scale)
989
gamma = self._baum_welch_gamma(alpha, beta)
990
xi = self._baum_welch_xi(alpha, beta, _obs)
991
992
# Compute the difference between the log probability of
993
# two iterations.
994
delta = log_probability - log_probability_prev
995
log_probability_prev = log_probability
996
997
# If the log probability does not change by more than delta,
998
# then terminate
999
if delta >= 0 and delta <= log_likelihood_cutoff:
1000
break
1001
1002
return log_probability, n_iterations
1003
1004
1005
cdef class GaussianMixtureHiddenMarkovModel(GaussianHiddenMarkovModel):
1006
"""
1007
GaussianMixtureHiddenMarkovModel(A, B, pi)
1008
1009
Gaussian mixture Hidden Markov Model.
1010
1011
INPUT:
1012
1013
- ``A`` -- matrix; the N x N transition matrix
1014
1015
- ``B`` -- list of mixture definitions for each state. Each
1016
state may have a varying number of gaussians with selection
1017
probabilities that sum to 1 and encoded as (p,(mu,sigma))
1018
1019
- ``pi`` -- initial state probabilities
1020
1021
- ``normalize`` --bool (default: True); if given, input is
1022
normalized to define valid probability distributions,
1023
e.g., the entries of A are made nonnegative and the rows
1024
sum to 1, and the probabilities in pi are normalized.
1025
1026
EXAMPLES::
1027
1028
sage: A = [[0.5,0.5],[0.5,0.5]]
1029
sage: B = [[(0.9,(0.0,1.0)), (0.1,(1,10000))],[(1,(1,1)), (0,(0,0.1))]]
1030
sage: hmm.GaussianMixtureHiddenMarkovModel(A, B, [1,0])
1031
Gaussian Mixture Hidden Markov Model with 2 States
1032
Transition matrix:
1033
[0.5 0.5]
1034
[0.5 0.5]
1035
Emission parameters:
1036
[0.9*N(0.0,1.0) + 0.1*N(1.0,10000.0), 1.0*N(1.0,1.0) + 0.0*N(0.0,0.1)]
1037
Initial probabilities: [1.0000, 0.0000]
1038
1039
TESTS:
1040
1041
If a standard deviation is 0, it is normalized to be slightly bigger than 0.::
1042
1043
sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(1,(0,0))]], [1])
1044
Gaussian Mixture Hidden Markov Model with 1 States
1045
Transition matrix:
1046
[1.0]
1047
Emission parameters:
1048
[1.0*N(0.0,1e-08)]
1049
Initial probabilities: [1.0000]
1050
1051
We test that number of emission distributions must be the same as the number of states::
1052
1053
sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [], [1])
1054
Traceback (most recent call last):
1055
...
1056
ValueError: number of GaussianMixtures must be the same as number of entries of pi
1057
1058
sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [[]], [1])
1059
Traceback (most recent call last):
1060
...
1061
ValueError: must specify at least one component of the mixture model
1062
1063
We test that the number of initial probabilities must equal the number of states::
1064
1065
sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [[]], [1,2])
1066
Traceback (most recent call last):
1067
...
1068
ValueError: number of entries of transition matrix A must be the square of the number of entries of pi
1069
"""
1070
1071
cdef object mixture # mixture
1072
1073
def __init__(self, A, B, pi=None, bint normalize=True):
1074
"""
1075
Initialize a Gaussian mixture hidden Markov model.
1076
1077
EXAMPLES::
1078
1079
sage: hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3])
1080
Gaussian Mixture Hidden Markov Model with 2 States
1081
Transition matrix:
1082
[0.9 0.1]
1083
[0.4 0.6]
1084
Emission parameters:
1085
[0.4*N(0.0,1.0) + 0.6*N(1.0,0.1), 1.0*N(0.0,1.0)]
1086
Initial probabilities: [0.7000, 0.3000]
1087
"""
1088
self.pi = util.initial_probs_to_TimeSeries(pi, normalize)
1089
self.N = len(self.pi)
1090
self.A = util.state_matrix_to_TimeSeries(A, self.N, normalize)
1091
if self.N*self.N != len(self.A):
1092
raise ValueError, "number of entries of transition matrix A must be the square of the number of entries of pi"
1093
1094
self.mixture = [b if isinstance(b, GaussianMixtureDistribution) else \
1095
GaussianMixtureDistribution([flatten(x) for x in b]) for b in B]
1096
if len(self.mixture) != self.N:
1097
raise ValueError, "number of GaussianMixtures must be the same as number of entries of pi"
1098
1099
def __repr__(self):
1100
"""
1101
Return string representation.
1102
1103
EXAMPLES::
1104
1105
sage: hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3]).__repr__()
1106
'Gaussian Mixture Hidden Markov Model with 2 States\nTransition matrix:\n[0.9 0.1]\n[0.4 0.6]\nEmission parameters:\n[0.4*N(0.0,1.0) + 0.6*N(1.0,0.1), 1.0*N(0.0,1.0)]\nInitial probabilities: [0.7000, 0.3000]'
1107
"""
1108
s = "Gaussian Mixture Hidden Markov Model with %s States"%self.N
1109
s += '\nTransition matrix:\n%s'%self.transition_matrix()
1110
s += '\nEmission parameters:\n%s'%self.emission_parameters()
1111
s += '\nInitial probabilities: %s'%self.initial_probabilities()
1112
return s
1113
1114
def __reduce__(self):
1115
"""
1116
Used in pickling.
1117
1118
EXAMPLES::
1119
1120
sage: m = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(.4,(0,1)), (.6,(1,0.1))]], [1])
1121
sage: loads(dumps(m)) == m
1122
True
1123
"""
1124
return unpickle_gaussian_mixture_hmm_v1, \
1125
(self.A, self.B, self.pi, self.mixture)
1126
1127
1128
def __cmp__(self, other):
1129
"""
1130
Compare self and other, which must both be GaussianMixtureHiddenMarkovModel's.
1131
1132
EXAMPLES::
1133
1134
sage: m = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(.4,(0,1)), (.6,(1,0.1))]], [1])
1135
sage: n = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(.5,(0,1)), (.5,(1,0.1))]], [1])
1136
sage: m < n
1137
True
1138
sage: m == m
1139
True
1140
sage: n > m
1141
True
1142
sage: n < m
1143
False
1144
"""
1145
if not isinstance(other, GaussianMixtureHiddenMarkovModel):
1146
raise ValueError
1147
return cmp(self.__reduce__()[1], other.__reduce__()[1])
1148
1149
def __getitem__(self, Py_ssize_t i):
1150
"""
1151
Return the Gaussian mixture distribution associated to the
1152
i-th state.
1153
1154
INPUT:
1155
1156
- i -- integer
1157
1158
OUTPUT:
1159
1160
- a Gaussian mixture distribution object
1161
1162
EXAMPLES::
1163
1164
sage: m = hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3])
1165
sage: m[0]
1166
0.4*N(0.0,1.0) + 0.6*N(1.0,0.1)
1167
sage: m[1]
1168
1.0*N(0.0,1.0)
1169
1170
Negative indexing works::
1171
1172
sage: m[-1]
1173
1.0*N(0.0,1.0)
1174
1175
Bounds are checked::
1176
1177
sage: m[2]
1178
Traceback (most recent call last):
1179
...
1180
IndexError: index out of range
1181
sage: m[-3]
1182
Traceback (most recent call last):
1183
...
1184
IndexError: index out of range
1185
"""
1186
if i < 0:
1187
i += self.N
1188
if i < 0 or i >= self.N:
1189
raise IndexError, 'index out of range'
1190
return self.mixture[i]
1191
1192
def emission_parameters(self):
1193
"""
1194
Returns a list of all the emission distributions.
1195
1196
OUTPUT:
1197
1198
- list of Gaussian mixtures
1199
1200
EXAMPLES::
1201
1202
sage: m = hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3])
1203
sage: m.emission_parameters()
1204
[0.4*N(0.0,1.0) + 0.6*N(1.0,0.1), 1.0*N(0.0,1.0)]
1205
"""
1206
return list(self.mixture)
1207
1208
cdef double random_sample(self, int state, randstate rstate):
1209
"""
1210
Return a random sample from the normal distribution associated
1211
to the given state.
1212
1213
This is only used internally, and no bounds or other error
1214
checking is done, so calling this improperly can lead to seg
1215
faults.
1216
1217
INPUT:
1218
1219
- state -- integer
1220
- rstate -- randstate instance
1221
1222
OUTPUT:
1223
1224
- double
1225
"""
1226
cdef GaussianMixtureDistribution G = self.mixture[state]
1227
return G._sample(rstate)
1228
1229
cdef double probability_of(self, int state, double observation):
1230
"""
1231
Return the probability b_j(o) of see the given observation o
1232
(=observation) given that we're in the given state j (=state).
1233
1234
This is a continuous probability, so this really returns a
1235
number p such that the probability of a value in the interval
1236
[o,o+d] is p*d.
1237
1238
INPUT:
1239
1240
- state -- integer
1241
- observation -- double
1242
1243
OUTPUT:
1244
1245
- double
1246
"""
1247
cdef GaussianMixtureDistribution G = self.mixture[state]
1248
return G.prob(observation)
1249
1250
cdef TimeSeries _baum_welch_mixed_gamma(self, TimeSeries alpha, TimeSeries beta,
1251
TimeSeries obs, int j):
1252
"""
1253
Let gamma_t(j,m) be the m-component (in the mixture) of the
1254
probability of being in state j at time t, given the
1255
observation sequence. This function outputs a TimeSeries v
1256
such that v[m*T + t] gives gamma_t(j, m) where T is the number
1257
of time steps.
1258
1259
INPUT:
1260
1261
- alpha -- TimeSeries
1262
- beta -- TimeSeries
1263
- obs -- TimeSeries
1264
- j -- int
1265
1266
OUTPUT:
1267
1268
- TimeSeries
1269
"""
1270
cdef int i, k, m, N = self.N
1271
cdef Py_ssize_t t, T = alpha._length//N
1272
1273
cdef double numer, alpha_minus, P, s, prob
1274
cdef GaussianMixtureDistribution G = self.mixture[j]
1275
cdef int M = len(G)
1276
cdef TimeSeries mixed_gamma = TimeSeries(T*M)
1277
1278
for t in range(T):
1279
prob = self.probability_of(j, obs._values[t])
1280
if prob == 0:
1281
# If the probability of observing obs[t] in state j is 0, then
1282
# all of the m-mixture components *have* to automatically be 0,
1283
# since prob is the sum of those and they are all nonnegative.
1284
for m in range(M):
1285
mixed_gamma._values[m*T + t] = 0
1286
else:
1287
# Compute the denominator we used when scaling gamma.
1288
# The key thing is that this is consistent between
1289
# gamma and mixed_gamma.
1290
P = 0
1291
for k in range(N):
1292
P += alpha._values[t*N+k]*beta._values[t*N+k]
1293
1294
# Divide out the total probability, so we can multiply back in
1295
# the m-components of the probability.
1296
alpha_minus = alpha._values[t*N + j] / prob
1297
for m in range(M):
1298
numer = alpha_minus * G.prob_m(obs._values[t], m) * beta._values[t*N + j]
1299
mixed_gamma._values[m*T + t] = numer / P
1300
1301
return mixed_gamma
1302
1303
def baum_welch(self, obs, int max_iter=1000, double log_likelihood_cutoff=1e-12,
1304
double min_sd=0.01, bint fix_emissions=False):
1305
"""
1306
Given an observation sequence obs, improve this HMM using the
1307
Baum-Welch algorithm to increase the probability of observing obs.
1308
1309
INPUT:
1310
1311
- obs -- a time series of emissions
1312
- max_iter -- integer (default: 1000) maximum number
1313
of Baum-Welch steps to take
1314
- log_likelihood_cutoff -- positive float (default: 1e-12);
1315
the minimal improvement in likelihood with respect to
1316
the last iteration required to continue. Relative value
1317
to log likelihood.
1318
- min_sd -- positive float (default: 0.01); when
1319
reestimating, the standard deviation of emissions is not
1320
allowed to be less than min_sd.
1321
- fix_emissions -- bool (default: False); if True, do not
1322
change emissions when updating
1323
1324
OUTPUT:
1325
1326
- changes the model in places, and returns the log
1327
likelihood and number of iterations.
1328
1329
EXAMPLES::
1330
1331
sage: m = hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3])
1332
sage: set_random_seed(0); v = m.sample(10); v
1333
[0.3576, -0.9365, 0.9449, -0.6957, 1.0217, 0.9644, 0.9987, -0.5950, -1.0219, 0.6477]
1334
sage: m.log_likelihood(v)
1335
-8.31408655939536...
1336
sage: m.baum_welch(v)
1337
(2.18905068682..., 15)
1338
sage: m.log_likelihood(v)
1339
2.18905068682...
1340
sage: m
1341
Gaussian Mixture Hidden Markov Model with 2 States
1342
Transition matrix:
1343
[ 0.874636333977 0.125363666023]
1344
[ 1.0 1.45168520229e-40]
1345
Emission parameters:
1346
[0.500161629343*N(-0.812298726239,0.173329026744) + 0.499838370657*N(0.982433690378,0.029719932009), 1.0*N(0.503260056832,0.145881515324)]
1347
Initial probabilities: [0.0000, 1.0000]
1348
1349
We illustrate bounding the standard deviation below. Note that above we had
1350
different emission parameters when the min_sd was the default of 0.01::
1351
1352
sage: m = hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3])
1353
sage: m.baum_welch(v, min_sd=1)
1354
(-12.617885761692..., 1000)
1355
sage: m.emission_parameters()
1356
[0.503545634447*N(0.200166509595,1.0) + 0.496454365553*N(0.200166509595,1.0), 1.0*N(0.0543433426535,1.0)]
1357
1358
We illustrate fixing all emissions::
1359
1360
sage: m = hmm.GaussianMixtureHiddenMarkovModel([[.9,.1],[.4,.6]], [[(.4,(0,1)), (.6,(1,0.1))],[(1,(0,1))]], [.7,.3])
1361
sage: set_random_seed(0); v = m.sample(10)
1362
sage: m.baum_welch(v, fix_emissions=True)
1363
(-7.58656858997..., 36)
1364
sage: m.emission_parameters()
1365
[0.4*N(0.0,1.0) + 0.6*N(1.0,0.1), 1.0*N(0.0,1.0)]
1366
"""
1367
if not isinstance(obs, TimeSeries):
1368
obs = TimeSeries(obs)
1369
cdef TimeSeries _obs = obs
1370
cdef TimeSeries alpha, beta, scale, gamma, mixed_gamma, mixed_gamma_m, xi
1371
cdef double log_probability, log_probability0, log_probability_prev, delta
1372
cdef int i, j, k, m, N, n_iterations
1373
cdef Py_ssize_t t, T
1374
cdef double denominator_A, numerator_A, denominator_B, numerator_mean, numerator_std, \
1375
numerator_c, c, mu, std, numer, denom, new_mu, new_std, new_c, s
1376
cdef GaussianMixtureDistribution G
1377
1378
# Initialization
1379
alpha, scale, log_probability0 = self._forward_scale_all(_obs)
1380
if not isfinite(log_probability0):
1381
return (0.0, 0)
1382
log_probability = log_probability0
1383
beta = self._backward_scale_all(_obs, scale)
1384
gamma = self._baum_welch_gamma(alpha, beta)
1385
xi = self._baum_welch_xi(alpha, beta, _obs)
1386
log_probability_prev = log_probability
1387
N = self.N
1388
n_iterations = 0
1389
T = len(_obs)
1390
1391
# Re-estimation
1392
while True:
1393
1394
# Reestimate frequency of state i in time t=0
1395
for i in range(N):
1396
if not isfinite(gamma._values[0*N+i]):
1397
# Before raising an error, leave self in a valid state.
1398
util.normalize_probability_TimeSeries(self.pi, 0, self.pi._length)
1399
raise RuntimeError, "impossible to compute gamma during reestimation"
1400
self.pi._values[i] = gamma._values[0*N+i]
1401
1402
# Update the probabilities pi to define a valid discrete distribution
1403
util.normalize_probability_TimeSeries(self.pi, 0, self.pi._length)
1404
1405
# Reestimate transition matrix and emission probabilities in
1406
# each state.
1407
for i in range(N):
1408
# Reestimate the state transition matrix
1409
denominator_A = 0.0
1410
for t in range(T-1):
1411
denominator_A += gamma._values[t*N+i]
1412
if not isnormal(denominator_A):
1413
raise RuntimeError, "unable to re-estimate pi (1)"
1414
for j in range(N):
1415
numerator_A = 0.0
1416
for t in range(T-1):
1417
numerator_A += xi._values[t*N*N+i*N+j]
1418
self.A._values[i*N+j] = numerator_A / denominator_A
1419
1420
# Rescale the i-th row of the transition matrix to be
1421
# a valid stochastic matrix:
1422
util.normalize_probability_TimeSeries(self.A, i*N, (i+1)*N)
1423
1424
########################################################################
1425
# Re-estimate the emission probabilities
1426
########################################################################
1427
G = self.mixture[i]
1428
if not fix_emissions and not G.is_fixed():
1429
mixed_gamma = self._baum_welch_mixed_gamma(alpha, beta, _obs, i)
1430
new_G = []
1431
for m in range(len(G)):
1432
if G.fixed._values[m]:
1433
new_G.append(G[m])
1434
continue
1435
1436
# Compute re-estimated mu_{j,m}
1437
numer = 0
1438
denom = 0
1439
for t in range(T):
1440
numer += mixed_gamma._values[m*T + t] * _obs._values[t]
1441
denom += mixed_gamma._values[m*T + t]
1442
new_mu = numer / denom
1443
1444
# Compute re-estimated standard deviation
1445
numer = 0
1446
mu = G[m][1]
1447
for t in range(T):
1448
numer += mixed_gamma._values[m*T + t] * \
1449
(_obs._values[t] - mu)*(_obs._values[t] - mu)
1450
1451
new_std = sqrt(numer / denom)
1452
if new_std < min_sd:
1453
new_std = min_sd
1454
1455
# Compute re-estimated weighting coefficient
1456
new_c = denom
1457
s = 0
1458
for t in range(T):
1459
s += gamma._values[t*N + i]
1460
new_c /= s
1461
1462
new_G.append((new_c,new_mu,new_std))
1463
1464
self.mixture[i] = GaussianMixtureDistribution(new_G)
1465
1466
n_iterations += 1
1467
if n_iterations >= max_iter: break
1468
1469
########################################################################
1470
# Initialization for next iteration
1471
########################################################################
1472
alpha, scale, log_probability0 = self._forward_scale_all(_obs)
1473
if not isfinite(log_probability0): break
1474
log_probability = log_probability0
1475
beta = self._backward_scale_all(_obs, scale)
1476
gamma = self._baum_welch_gamma(alpha, beta)
1477
xi = self._baum_welch_xi(alpha, beta, _obs)
1478
1479
# Compute the difference between the log probability of
1480
# two iterations.
1481
delta = log_probability - log_probability_prev
1482
log_probability_prev = log_probability
1483
1484
# If the log probability does not improve by more than
1485
# delta, then terminate
1486
if delta >= 0 and delta <= log_likelihood_cutoff:
1487
break
1488
1489
return log_probability, n_iterations
1490
1491
1492
##################################################
1493
# For Unpickling
1494
##################################################
1495
1496
# We keep the _v0 function for backwards compatible.
1497
def unpickle_gaussian_hmm_v0(A, B, pi, name):
1498
"""
1499
EXAMPLES::
1500
1501
sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1])
1502
sage: sage.stats.hmm.chmm.unpickle_gaussian_hmm_v0(m.transition_matrix(), m.emission_parameters(), m.initial_probabilities(), 'test')
1503
Gaussian Hidden Markov Model with 1 States
1504
Transition matrix:
1505
[1.0]
1506
Emission parameters:
1507
[(0.0, 1.0)]
1508
Initial probabilities: [1.0000]
1509
"""
1510
return GaussianHiddenMarkovModel(A,B,pi)
1511
1512
1513
def unpickle_gaussian_hmm_v1(A, B, pi, prob, n_out):
1514
"""
1515
EXAMPLES::
1516
1517
sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1])
1518
sage: loads(dumps(m)) == m # indirect test
1519
True
1520
"""
1521
cdef GaussianHiddenMarkovModel m = PY_NEW(GaussianHiddenMarkovModel)
1522
m.A = A
1523
m.B = B
1524
m.pi = pi
1525
m.prob = prob
1526
m.n_out = n_out
1527
return m
1528
1529
def unpickle_gaussian_mixture_hmm_v1(A, B, pi, mixture):
1530
"""
1531
EXAMPLES::
1532
1533
sage: m = hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(.4,(0,1)), (.6,(1,0.1))]], [1])
1534
sage: loads(dumps(m)) == m # indirect test
1535
True
1536
"""
1537
cdef GaussianMixtureHiddenMarkovModel m = PY_NEW(GaussianMixtureHiddenMarkovModel)
1538
m.A = A
1539
m.B = B
1540
m.pi = pi
1541
m.mixture = mixture
1542
return m
1543
1544
1545