Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sagelib
Path: blob/master/sage/stats/hmm/util.pyx
4079 views
1
"""
2
Hidden Markov Models -- Utility functions
3
4
AUTHOR:
5
6
- William Stein, 2010-03
7
"""
8
9
#############################################################################
10
# Copyright (C) 2010 William Stein <[email protected]>
11
# Distributed under the terms of the GNU General Public License (GPL) v2+.
12
# The full text of the GPL is available at:
13
# http://www.gnu.org/licenses/
14
#############################################################################
15
16
include "../../ext/stdsage.pxi"
17
18
from sage.matrix.all import is_Matrix
19
from sage.misc.flatten import flatten
20
21
cdef class HMM_Util:
22
"""
23
A class used in order to share cdef's methods between different files.
24
"""
25
cpdef normalize_probability_TimeSeries(self, TimeSeries T, Py_ssize_t i, Py_ssize_t j):
26
"""
27
This function is used internally by the Hidden Markov Models code.
28
29
Replace entries of T[i:j] in place so that they are all
30
nonnegative and sum to 1. Negative entries are replaced by 0 and
31
T[i:j] is then rescaled to ensure that the sum of the entries in
32
each row is equal to 1. If all entries are 0, replace them
33
by 1/(j-i).
34
35
INPUT:
36
37
- T -- a TimeSeries
38
- i -- nonnegative integer
39
- j -- nonnegative integer
40
41
OUTPUT:
42
43
- T is modified
44
45
EXAMPLES::
46
47
sage: import sage.stats.hmm.util
48
sage: T = stats.TimeSeries([.1, .3, .7, .5])
49
sage: u = sage.stats.hmm.util.HMM_Util()
50
sage: u.normalize_probability_TimeSeries(T,0,3)
51
sage: T
52
[0.0909, 0.2727, 0.6364, 0.5000]
53
sage: u.normalize_probability_TimeSeries(T,0,4)
54
sage: T
55
[0.0606, 0.1818, 0.4242, 0.3333]
56
sage: abs(T.sum()-1) < 1e-8 # might not exactly equal 1 due to rounding
57
True
58
"""
59
# One single bounds check only
60
if i < 0 or j < 0 or i > T._length or j > T._length:
61
raise IndexError
62
63
if j-i <= 0:
64
# Nothing to do
65
return
66
67
cdef Py_ssize_t k
68
69
# Replace negative entries by 0, summing entries
70
cdef double s = 0, t
71
for k in range(i,j):
72
if T._values[k] < 0:
73
T._values[k] = 0
74
else:
75
s += T._values[k]
76
77
if s == 0:
78
# If sum is 0, make all entries 1/(j-i).
79
t = 1.0/(j-i)
80
for k in range(i,j):
81
T._values[k] = t
82
else:
83
# Normalie so sum is 1.
84
for k in range(i,j):
85
T._values[k] /= s
86
87
88
89
cpdef TimeSeries initial_probs_to_TimeSeries(self, pi, bint normalize):
90
"""
91
This function is used internally by the __init__ methods of
92
various Hidden Markov Models.
93
94
INPUT:
95
96
- pi -- vector, list, or TimeSeries
97
- normalize -- if True, replace negative entries by 0 and
98
rescale to ensure that the sum of the entries in each row is
99
equal to 1. If the sum of the entries in a row is 0, replace them
100
all by 1/N.
101
102
OUTPUT:
103
- a TimeSeries of length N
104
105
EXAMPLES::
106
107
sage: import sage.stats.hmm.util
108
sage: u = sage.stats.hmm.util.HMM_Util()
109
sage: u.initial_probs_to_TimeSeries([0.1,0.2,0.9], True)
110
[0.0833, 0.1667, 0.7500]
111
sage: u.initial_probs_to_TimeSeries([0.1,0.2,0.9], False)
112
[0.1000, 0.2000, 0.9000]
113
"""
114
cdef TimeSeries T
115
if PY_TYPE_CHECK(pi, TimeSeries):
116
T = pi
117
else:
118
if not isinstance(pi, list):
119
pi = list(pi)
120
T = TimeSeries(pi)
121
if normalize:
122
# Now normalize
123
self.normalize_probability_TimeSeries(T, 0, T._length)
124
return T
125
126
127
cpdef TimeSeries state_matrix_to_TimeSeries(self, A, int N, bint normalize):
128
"""
129
This function is used internally by the __init__ methods of
130
Hidden Markov Models to make a transition matrix from A.
131
132
133
INPUT:
134
135
- A -- matrix, list, list of lists, or TimeSeries
136
- N -- number of states
137
- normalize -- if True, replace negative entries by 0 and
138
rescale to ensure that the sum of the entries in each row is
139
equal to 1. If the sum of the entries in a row is 0, replace them
140
all by 1/N.
141
142
OUTPUT:
143
144
- a TimeSeries
145
146
EXAMPLES::
147
148
sage: import sage.stats.hmm.util
149
sage: u = sage.stats.hmm.util.HMM_Util()
150
sage: u.state_matrix_to_TimeSeries([[.1,.7],[3/7,4/7]], 2, True)
151
[0.1250, 0.8750, 0.4286, 0.5714]
152
sage: u.state_matrix_to_TimeSeries([[.1,.7],[3/7,4/7]], 2, False)
153
[0.1000, 0.7000, 0.4286, 0.5714]
154
"""
155
cdef TimeSeries T
156
if PY_TYPE_CHECK(A, TimeSeries):
157
T = A
158
elif is_Matrix(A):
159
T = TimeSeries(A.list())
160
elif isinstance(A, list):
161
T = TimeSeries(flatten(A))
162
else:
163
T = TimeSeries(A)
164
cdef Py_ssize_t i
165
if normalize:
166
# Set to 0 negative rows and make sure sum of entries in each
167
# row is 1.
168
if len(T) != N*N:
169
raise ValueError, "number of entries of transition matrix A must be the square of the number of entries of pi"
170
for i in range(N):
171
self.normalize_probability_TimeSeries(T, i*N, (i+1)*N)
172
return T
173
174
175