Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sagesmc
Path: blob/master/src/sage/monoids/string_monoid_element.py
8815 views
1
"""
2
String Monoid Elements
3
4
AUTHORS:
5
6
- David Kohel <[email protected]>, 2007-01
7
8
Elements of free string monoids, internal representation subject to change.
9
10
These are special classes of free monoid elements with distinct printing.
11
12
The internal representation of elements does not use the exponential
13
compression of FreeMonoid elements (a feature), and could be packed into words.
14
"""
15
16
#*****************************************************************************
17
# Copyright (C) 2007 David Kohel <[email protected]>
18
#
19
# Distributed under the terms of the GNU General Public License (GPL)
20
#
21
# http://www.gnu.org/licenses/
22
#*****************************************************************************
23
24
# import operator
25
from sage.rings.integer import Integer
26
from sage.rings.all import RealField
27
# from sage.structure.element import MonoidElement
28
from sage.probability.random_variable import DiscreteProbabilitySpace
29
from free_monoid_element import FreeMonoidElement
30
import string_monoid
31
32
def is_StringMonoidElement(x):
33
r"""
34
"""
35
return isinstance(x, StringMonoidElement)
36
37
def is_AlphabeticStringMonoidElement(x):
38
r"""
39
"""
40
return isinstance(x, StringMonoidElement) and \
41
isinstance(x.parent(), string_monoid.AlphabeticStringMonoid)
42
43
def is_BinaryStringMonoidElement(x):
44
r"""
45
"""
46
return isinstance(x, StringMonoidElement) and \
47
isinstance(x.parent(), string_monoid.BinaryStringMonoid)
48
49
def is_OctalStringMonoidElement(x):
50
r"""
51
"""
52
return isinstance(x, StringMonoidElement) and \
53
isinstance(x.parent(), string_monoid.OctalStringMonoid)
54
55
def is_HexadecimalStringMonoidElement(x):
56
r"""
57
"""
58
return isinstance(x, StringMonoidElement) and \
59
isinstance(x.parent(), string_monoid.HexadecimalStringMonoid)
60
61
def is_Radix64StringMonoidElement(x):
62
r"""
63
"""
64
return isinstance(x, StringMonoidElement) and \
65
isinstance(x.parent(), string_monoid.Radix64StringMonoid)
66
67
68
class StringMonoidElement(FreeMonoidElement):
69
"""
70
Element of a free string monoid.
71
"""
72
73
def __init__(self, S, x, check=True):
74
"""
75
Create the element ``x`` of the StringMonoid ``S``.
76
77
This should typically be called by a StringMonoid.
78
"""
79
FreeMonoidElement.__init__(self, S, [])
80
if isinstance(x, list):
81
if check:
82
for b in x:
83
if not isinstance(b, (int, long, Integer)):
84
raise TypeError(
85
"x (= %s) must be a list of integers." % x)
86
self._element_list = list(x) # make copy
87
elif isinstance(x, str):
88
alphabet = list(self.parent().alphabet())
89
self._element_list = []
90
for i in range(len(x)):
91
try:
92
b = alphabet.index(x[i])
93
except ValueError:
94
raise TypeError(
95
"Argument x (= %s) is not a valid string." % x)
96
self._element_list += [b]
97
else:
98
raise TypeError("Argument x (= %s) is of the wrong type." % x)
99
100
def __cmp__(left, right):
101
"""
102
Compare two free monoid elements with the same parents.
103
104
The ordering is the one on the underlying sorted list of
105
(monomial,coefficients) pairs.
106
107
EXAMPLES::
108
109
sage: S = BinaryStrings()
110
sage: (x,y) = S.gens()
111
sage: x * y < y * x
112
True
113
sage: S("01") < S("10")
114
True
115
"""
116
return cmp(left._element_list, right._element_list)
117
118
def _repr_(self):
119
"""
120
The self-representation of a string monoid element. Unlike Python
121
strings we print without the enclosing quotes.
122
"""
123
S = self.parent()
124
alphabet = S.alphabet()
125
s = ''
126
for b in self._element_list:
127
c = alphabet[b]
128
s += c
129
return s
130
131
def _latex_(self):
132
"""
133
Return latex representation of self.
134
135
EXAMPLES::
136
137
sage: S = BinaryStrings()
138
sage: s = S('101111000')
139
sage: latex(s)
140
101111000
141
"""
142
return self._repr_()
143
144
def __mul__(self, y):
145
"""
146
Multiply 2 free string monoid elements.
147
148
EXAMPLES::
149
150
sage: S = BinaryStrings()
151
sage: (x,y) = S.gens()
152
sage: x*y
153
01
154
"""
155
if not isinstance(y, StringMonoidElement):
156
raise TypeError("Argument y (= %s) is of wrong type." % y)
157
S = self.parent()
158
x_elt = self._element_list
159
y_elt = y._element_list
160
z = S('')
161
z._element_list = x_elt + y_elt
162
return z
163
164
def __pow__(self, n):
165
"""
166
Return the `n`-th power of the string element.
167
168
EXAMPLES::
169
170
sage: (x,y) = BinaryStrings().gens()
171
sage: x**3 * y**5 * x**7
172
000111110000000
173
sage: x**0
174
175
176
Note that raising to a negative power is *not* a constructor
177
for an element of the corresponding free group (yet).
178
179
::
180
181
sage: x**(-1)
182
Traceback (most recent call last):
183
...
184
IndexError: Argument n (= -1) must be non-negative.
185
"""
186
if not isinstance(n, (int, long, Integer)):
187
raise TypeError("Argument n (= %s) must be an integer." % n)
188
if n < 0:
189
raise IndexError("Argument n (= %s) must be non-negative." % n)
190
elif n == 0:
191
return self.parent()('')
192
elif n == 1:
193
return self
194
z = self.parent()('')
195
z._element_list = self._element_list * n
196
return z
197
198
def __len__(self):
199
"""
200
Return the number of products that occur in this monoid element.
201
For example, the length of the identity is 0, and the length
202
of the monoid `x_0^2x_1` is three.
203
204
EXAMPLES::
205
206
sage: S = BinaryStrings()
207
sage: z = S('')
208
sage: len(z)
209
0
210
sage: (x,y) = S.gens()
211
sage: len(x**2 * y**3)
212
5
213
"""
214
return len(self._element_list)
215
216
def __iter__(self):
217
"""
218
Return an iterator over this element as a string.
219
220
EXAMPLES::
221
222
sage: t = AlphabeticStrings()('SHRUBBERY')
223
sage: t.__iter__().next()
224
S
225
sage: list(t)
226
[S, H, R, U, B, B, E, R, Y]
227
"""
228
l = len(self._element_list)
229
i=0
230
while i < l:
231
yield self[i]
232
i+=1
233
raise StopIteration
234
235
def __getitem__(self, n):
236
"""
237
Return the n-th string character.
238
239
EXAMPLES::
240
241
sage: t = AlphabeticStrings()('SHRUBBERY')
242
sage: t[0]
243
S
244
sage: t[3]
245
U
246
sage: t[-1]
247
Y
248
"""
249
try:
250
c = self._element_list[n]
251
except Exception:
252
raise IndexError("Argument n (= %s) is not a valid index." % n)
253
if not isinstance(c, list):
254
c = [c]
255
return self.parent()(c)
256
257
def decoding(self, padic=False):
258
r"""
259
The byte string associated to a binary or hexadecimal string
260
monoid element.
261
262
EXAMPLES::
263
264
sage: S = HexadecimalStrings()
265
sage: s = S.encoding("A..Za..z"); s
266
412e2e5a612e2e7a
267
sage: s.decoding()
268
'A..Za..z'
269
sage: s = S.encoding("A..Za..z",padic=True); s
270
14e2e2a516e2e2a7
271
sage: s.decoding()
272
'\x14\xe2\xe2\xa5\x16\xe2\xe2\xa7'
273
sage: s.decoding(padic=True)
274
'A..Za..z'
275
sage: S = BinaryStrings()
276
sage: s = S.encoding("A..Za..z"); s
277
0100000100101110001011100101101001100001001011100010111001111010
278
sage: s.decoding()
279
'A..Za..z'
280
sage: s = S.encoding("A..Za..z",padic=True); s
281
1000001001110100011101000101101010000110011101000111010001011110
282
sage: s.decoding()
283
'\x82ttZ\x86tt^'
284
sage: s.decoding(padic=True)
285
'A..Za..z'
286
"""
287
S = self.parent()
288
if isinstance(S, string_monoid.AlphabeticStringMonoid):
289
return ''.join([ chr(65+i) for i in self._element_list ])
290
n = self.__len__()
291
if isinstance(S, string_monoid.HexadecimalStringMonoid):
292
if not n % 2 == 0:
293
"String %s must have even length to determine a byte character string." % str(self)
294
s = []
295
x = self._element_list
296
for k in range(n//2):
297
m = 2*k
298
if padic:
299
c = chr(x[m]+16*x[m+1])
300
else:
301
c = chr(16*x[m]+x[m+1])
302
s.append(c)
303
return ''.join(s)
304
if isinstance(S, string_monoid.BinaryStringMonoid):
305
if not n % 8 == 0:
306
"String %s must have even length 0 mod 8 to determine a byte character string." % str(self)
307
pows = [ 2**i for i in range(8) ]
308
s = []
309
x = self._element_list
310
for k in range(n//8):
311
m = 8*k
312
if padic:
313
c = chr(sum([ x[m+i]*pows[i] for i in range(8) ]))
314
else:
315
c = chr(sum([ x[m+7-i]*pows[i] for i in range(8) ]))
316
s.append(c)
317
return ''.join(s)
318
raise TypeError(
319
"Argument %s must be an alphabetic, binary, or hexadecimal string." % str(self))
320
321
def coincidence_index(self, prec=0):
322
"""
323
Returns the probability of two randomly chosen characters being equal.
324
"""
325
if prec == 0:
326
RR = RealField()
327
else:
328
RR = RealField(prec)
329
char_dict = {}
330
for i in self._element_list:
331
# if char_dict.has_key(i):
332
# The method .has_key() has been deprecated since Python 2.2. Use
333
# "k in Dict" instead of "Dict.has_key(k)".
334
if i in char_dict:
335
char_dict[i] += 1
336
else:
337
char_dict[i] = 1
338
nn = 0
339
ci_num = 0
340
for i in char_dict.keys():
341
ni = char_dict[i]
342
nn += ni
343
ci_num += ni*(ni-1)
344
ci_den = nn*(nn-1)
345
return RR(ci_num)/ci_den
346
347
def character_count(self):
348
r"""
349
Return the count of each unique character.
350
351
EXAMPLES:
352
353
Count the character frequency in an object comprised of capital
354
letters of the English alphabet::
355
356
sage: M = AlphabeticStrings().encoding("abcabf")
357
sage: sorted(M.character_count().items())
358
[(A, 2), (B, 2), (C, 1), (F, 1)]
359
360
In an object comprised of binary numbers::
361
362
sage: M = BinaryStrings().encoding("abcabf")
363
sage: sorted(M.character_count().items())
364
[(0, 28), (1, 20)]
365
366
In an object comprised of octal numbers::
367
368
sage: A = OctalStrings()
369
sage: M = A([1, 2, 3, 2, 5, 3])
370
sage: sorted(M.character_count().items())
371
[(1, 1), (2, 2), (3, 2), (5, 1)]
372
373
In an object comprised of hexadecimal numbers::
374
375
sage: A = HexadecimalStrings()
376
sage: M = A([1, 2, 4, 6, 2, 4, 15])
377
sage: sorted(M.character_count().items())
378
[(1, 1), (2, 2), (4, 2), (6, 1), (f, 1)]
379
380
In an object comprised of radix-64 characters::
381
382
sage: A = Radix64Strings()
383
sage: M = A([1, 2, 63, 45, 45, 10]); M
384
BC/ttK
385
sage: sorted(M.character_count().items())
386
[(B, 1), (C, 1), (K, 1), (t, 2), (/, 1)]
387
388
TESTS:
389
390
Empty strings return no counts of character frequency::
391
392
sage: M = AlphabeticStrings().encoding("")
393
sage: M.character_count()
394
{}
395
sage: M = BinaryStrings().encoding("")
396
sage: M.character_count()
397
{}
398
sage: A = OctalStrings()
399
sage: M = A([])
400
sage: M.character_count()
401
{}
402
sage: A = HexadecimalStrings()
403
sage: M = A([])
404
sage: M.character_count()
405
{}
406
sage: A = Radix64Strings()
407
sage: M = A([])
408
sage: M.character_count()
409
{}
410
"""
411
# the character frequency, i.e. the character count
412
CF = {}
413
for e in self:
414
if e in CF:
415
CF[e] += 1
416
else:
417
CF.setdefault(e, 1)
418
return CF
419
420
def frequency_distribution(self, length=1, prec=0):
421
"""
422
Returns the probability space of character frequencies. The output
423
of this method is different from that of the method
424
:func:`characteristic_frequency()
425
<sage.monoids.string_monoid.AlphabeticStringMonoid.characteristic_frequency>`.
426
One can think of the characteristic frequency probability of an
427
element in an alphabet `A` as the expected probability of that element
428
occurring. Let `S` be a string encoded using elements of `A`. The
429
frequency probability distribution corresponding to `S` provides us
430
with the frequency probability of each element of `A` as observed
431
occurring in `S`. Thus one distribution provides expected
432
probabilities, while the other provides observed probabilities.
433
434
INPUT:
435
436
- ``length`` -- (default ``1``) if ``length=1`` then consider the
437
probability space of monogram frequency, i.e. probability
438
distribution of single characters. If ``length=2`` then consider
439
the probability space of digram frequency, i.e. probability
440
distribution of pairs of characters. This method currently
441
supports the generation of probability spaces for monogram
442
frequency (``length=1``) and digram frequency (``length=2``).
443
444
- ``prec`` -- (default ``0``) a non-negative integer representing
445
the precision (in number of bits) of a floating-point number. The
446
default value ``prec=0`` means that we use 53 bits to represent
447
the mantissa of a floating-point number. For more information on
448
the precision of floating-point numbers, see the function
449
:func:`RealField() <sage.rings.real_mpfr.RealField>` or refer to the module
450
:mod:`real_mpfr <sage.rings.real_mpfr>`.
451
452
EXAMPLES:
453
454
Capital letters of the English alphabet::
455
456
sage: M = AlphabeticStrings().encoding("abcd")
457
sage: L = M.frequency_distribution().function()
458
sage: sorted(L.items())
459
<BLANKLINE>
460
[(A, 0.250000000000000),
461
(B, 0.250000000000000),
462
(C, 0.250000000000000),
463
(D, 0.250000000000000)]
464
465
The binary number system::
466
467
sage: M = BinaryStrings().encoding("abcd")
468
sage: L = M.frequency_distribution().function()
469
sage: sorted(L.items())
470
[(0, 0.593750000000000), (1, 0.406250000000000)]
471
472
The hexadecimal number system::
473
474
sage: M = HexadecimalStrings().encoding("abcd")
475
sage: L = M.frequency_distribution().function()
476
sage: sorted(L.items())
477
<BLANKLINE>
478
[(1, 0.125000000000000),
479
(2, 0.125000000000000),
480
(3, 0.125000000000000),
481
(4, 0.125000000000000),
482
(6, 0.500000000000000)]
483
484
Get the observed frequency probability distribution of digrams in the
485
string "ABCD". This string consists of the following digrams: "AB",
486
"BC", and "CD". Now find out the frequency probability of each of
487
these digrams as they occur in the string "ABCD"::
488
489
sage: M = AlphabeticStrings().encoding("abcd")
490
sage: D = M.frequency_distribution(length=2).function()
491
sage: sorted(D.items())
492
[(AB, 0.333333333333333), (BC, 0.333333333333333), (CD, 0.333333333333333)]
493
"""
494
if not length in (1, 2):
495
raise NotImplementedError("Not implemented")
496
if prec == 0:
497
RR = RealField()
498
else:
499
RR = RealField(prec)
500
S = self.parent()
501
n = S.ngens()
502
if length == 1:
503
Alph = S.gens()
504
else:
505
Alph = tuple([ x*y for x in S.gens() for y in S.gens() ])
506
X = {}
507
N = len(self)-length+1
508
eps = RR(Integer(1)/N)
509
for i in range(N):
510
c = self[i:i+length]
511
# if X.has_key(c):
512
# The method .has_key() has been deprecated since Python 2.2. Use
513
# "k in Dict" instead of "Dict.has_key(k)".
514
if c in X:
515
X[c] += eps
516
else:
517
X[c] = eps
518
# Return a dictionary of probability distribution. This should
519
# allow for easier parsing of the dictionary.
520
return DiscreteProbabilitySpace(Alph, X, RR)
521
522