CoCalc -- util.pyx

GitHub Repository: sagemath/sagelib
Path: blob/master/sage/stats/hmm/util.pyx
⁴⁰⁷⁹ views
1
"""
2
Hidden Markov Models -- Utility functions
3

4
AUTHOR:
5

6
   - William Stein, 2010-03
7
"""
8

9
#############################################################################
10
#       Copyright (C) 2010 William Stein <[email protected]>
11
#  Distributed under the terms of the GNU General Public License (GPL) v2+.
12
#  The full text of the GPL is available at:
13
#                  http://www.gnu.org/licenses/
14
#############################################################################
15
    
16
include "../../ext/stdsage.pxi"
17

18
from sage.matrix.all import is_Matrix
19
from sage.misc.flatten  import flatten
20

21
cdef class HMM_Util:
22
    """
23
    A class used in order to share cdef's methods between different files.
24
    """
25
    cpdef normalize_probability_TimeSeries(self, TimeSeries T, Py_ssize_t i, Py_ssize_t j):
26
        """
27
        This function is used internally by the Hidden Markov Models code.
28

29
        Replace entries of T[i:j] in place so that they are all
30
        nonnegative and sum to 1.  Negative entries are replaced by 0 and
31
        T[i:j] is then rescaled to ensure that the sum of the entries in
32
        each row is equal to 1.  If all entries are 0, replace them
33
        by 1/(j-i).
34

35
        INPUT:
36

37
            - T -- a TimeSeries
38
            - i -- nonnegative integer
39
            - j -- nonnegative integer
40

41
        OUTPUT:
42

43
            - T is modified
44

45
        EXAMPLES::
46

47
            sage: import sage.stats.hmm.util
48
            sage: T = stats.TimeSeries([.1, .3, .7, .5])
49
            sage: u = sage.stats.hmm.util.HMM_Util()
50
            sage: u.normalize_probability_TimeSeries(T,0,3)
51
            sage: T
52
            [0.0909, 0.2727, 0.6364, 0.5000]
53
            sage: u.normalize_probability_TimeSeries(T,0,4)
54
            sage: T
55
            [0.0606, 0.1818, 0.4242, 0.3333]
56
            sage: abs(T.sum()-1) < 1e-8    # might not exactly equal 1 due to rounding
57
            True
58
        """
59
        # One single bounds check only
60
        if i < 0 or j < 0 or i > T._length or j > T._length:
61
            raise IndexError
62

63
        if j-i <= 0:
64
            # Nothing to do
65
            return
66

67
        cdef Py_ssize_t k
68

69
        # Replace negative entries by 0, summing entries
70
        cdef double s = 0, t
71
        for k in range(i,j):
72
            if T._values[k] < 0:
73
                T._values[k] = 0
74
            else:
75
                s += T._values[k]
76

77
        if s == 0:
78
            # If sum is 0, make all entries 1/(j-i).
79
            t = 1.0/(j-i)
80
            for k in range(i,j):
81
                T._values[k] = t
82
        else:
83
            # Normalie so sum is 1.
84
            for k in range(i,j):
85
                T._values[k] /= s
86

87

88

89
    cpdef TimeSeries initial_probs_to_TimeSeries(self, pi, bint normalize):
90
        """
91
        This function is used internally by the __init__ methods of
92
        various Hidden Markov Models.
93
        
94
        INPUT:
95

96
            - pi -- vector, list, or TimeSeries
97
            - normalize -- if True, replace negative entries by 0 and
98
              rescale to ensure that the sum of the entries in each row is
99
              equal to 1.  If the sum of the entries in a row is 0, replace them
100
              all by 1/N.
101

102
        OUTPUT:
103
            - a TimeSeries of length N
104

105
        EXAMPLES::
106

107
            sage: import sage.stats.hmm.util
108
            sage: u = sage.stats.hmm.util.HMM_Util()
109
            sage: u.initial_probs_to_TimeSeries([0.1,0.2,0.9], True)
110
            [0.0833, 0.1667, 0.7500]
111
            sage: u.initial_probs_to_TimeSeries([0.1,0.2,0.9], False)
112
            [0.1000, 0.2000, 0.9000]
113
        """
114
        cdef TimeSeries T
115
        if PY_TYPE_CHECK(pi, TimeSeries):
116
            T = pi
117
        else:
118
            if not isinstance(pi, list):
119
                pi = list(pi)
120
            T = TimeSeries(pi)
121
        if normalize:
122
            # Now normalize
123
            self.normalize_probability_TimeSeries(T, 0, T._length)
124
        return T
125

126

127
    cpdef TimeSeries state_matrix_to_TimeSeries(self, A, int N, bint normalize):
128
        """
129
        This function is used internally by the __init__ methods of
130
        Hidden Markov Models to make a transition matrix from A.
131
        
132

133
        INPUT:
134

135
            - A -- matrix, list, list of lists, or TimeSeries
136
            - N -- number of states
137
            - normalize -- if True, replace negative entries by 0 and
138
              rescale to ensure that the sum of the entries in each row is
139
              equal to 1.  If the sum of the entries in a row is 0, replace them
140
              all by 1/N.
141

142
        OUTPUT:
143

144
            - a TimeSeries
145

146
        EXAMPLES::
147

148
            sage: import sage.stats.hmm.util
149
            sage: u = sage.stats.hmm.util.HMM_Util()
150
            sage: u.state_matrix_to_TimeSeries([[.1,.7],[3/7,4/7]], 2, True)
151
            [0.1250, 0.8750, 0.4286, 0.5714]
152
            sage: u.state_matrix_to_TimeSeries([[.1,.7],[3/7,4/7]], 2, False)
153
            [0.1000, 0.7000, 0.4286, 0.5714]
154
        """
155
        cdef TimeSeries T
156
        if PY_TYPE_CHECK(A, TimeSeries):
157
            T = A
158
        elif is_Matrix(A):
159
            T = TimeSeries(A.list())
160
        elif isinstance(A, list):
161
            T = TimeSeries(flatten(A))
162
        else:
163
            T = TimeSeries(A)
164
        cdef Py_ssize_t i
165
        if normalize:
166
            # Set to 0 negative rows and make sure sum of entries in each
167
            # row is 1.
168
            if len(T) != N*N:
169
                raise ValueError, "number of entries of transition matrix A must be the square of the number of entries of pi"
170
            for i in range(N):
171
                self.normalize_probability_TimeSeries(T, i*N, (i+1)*N)
172
        return T
173

174

175
Product

Resources

Company