Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sagelib
Path: blob/master/sage/monoids/string_ops.py
4045 views
1
#*****************************************************************************
2
# Copyright (C) 2007 David Kohel <[email protected]>
3
#
4
# Distributed under the terms of the GNU General Public License (GPL)
5
#
6
# http://www.gnu.org/licenses/
7
#*****************************************************************************
8
9
from sage.rings.all import RealField
10
from sage.probability.random_variable import DiscreteProbabilitySpace
11
from string_monoid_element import StringMonoidElement
12
13
def strip_encoding(S):
14
"""
15
The upper case string of S stripped of all non-alphabetic characters.
16
EXAMPLES:
17
sage: S = "The cat in the hat."
18
sage: strip_encoding(S)
19
'THECATINTHEHAT'
20
"""
21
if not isinstance(S,str):
22
raise TypeError, "Argument S (= %s) must be a string."
23
X = ''
24
for i in range(len(S)):
25
C = S[i]
26
if C.isalpha():
27
X += S[i].upper()
28
return X
29
30
def frequency_distribution(S, n=1, field=None):
31
"""
32
The probability space of frequencies of n-character substrings of S.
33
"""
34
if isinstance(S,tuple):
35
S = list(S)
36
elif isinstance(S,(str,StringMonoidElement)):
37
S = [ S[i:i+n] for i in range(len(S)-n+1) ]
38
if field is None:
39
field = RealField()
40
if isinstance(S,list):
41
P = {}
42
N = len(S)
43
eps = field(1)/N
44
for i in range(N):
45
c = S[i]
46
if P.has_key(c):
47
P[c] += eps
48
else:
49
P[c] = eps
50
return DiscreteProbabilitySpace(S,P,field)
51
raise TypeError, "Argument S (= %s) must be a string, list, or tuple."
52
53
def coincidence_index(S,n=1):
54
"""
55
The coincidence index of the string S.
56
EXAMPLES:
57
sage: S = strip_encoding("The cat in the hat.")
58
sage: coincidence_index(S)
59
0.120879120879121
60
"""
61
if not isinstance(S,str):
62
try:
63
S.coincidence_index(n)
64
except AttributeError:
65
raise TypeError, "Argument S (= %s) must be a string."
66
S = strip_encoding(S)
67
N = len(S)-n+1
68
X = {}
69
for i in range(N):
70
c = S[i:i+n]
71
if X.has_key(c):
72
X[c] += 1
73
else:
74
X[c] = 1
75
RR = RealField()
76
return RR(sum([ m*(m-1) for m in X.values() ]))/RR(N*(N-1))
77
78
def coincidence_discriminant(S,n=2):
79
"""
80
Input: A tuple of strings, e.g. produced as decimation of transposition
81
ciphertext, or a sample plaintext.
82
Output: A measure of the difference of probability of association of
83
character pairs, relative to their independent one-character probabilities.
84
85
EXAMPLES:
86
sage: S = strip_encoding("The cat in the hat.")
87
sage: coincidence_discriminant([ S[i:i+2] for i in range(len(S)-1) ])
88
0.0827001855677322
89
"""
90
if not isinstance(S,(list,tuple)):
91
raise TypeError, "Argument S (= %s) must be a list or tuple" % S
92
if n != 2:
93
raise ValueError, "Argument n (= %s) is only implemented for n = 2" % n
94
truth = True
95
for bool in ( isinstance(c,(str,StringMonoidElement)) for c in S ):
96
truth = truth and bool
97
if not truth:
98
raise TypeError, "Argument S (= %s) must be a list of strings."
99
for bool in ( len(c) == n for c in S ):
100
truth = truth and bool
101
if not truth:
102
raise ValueError, "Argument S (= %s) must be a list of strings of length 2" % S
103
X1 = [ frequency_distribution([ s[i] for s in S]) for i in range(2) ]
104
XX = frequency_distribution(S)
105
if isinstance(S[0],StringMonoidElement):
106
M = S[0].parent()
107
n = M.ngens()
108
return sum([ (XX(M([i,j]))-X1[0](M([i]))*X1[1](M([j])))**2 for i in range(n) for j in range(n) ])
109
AZ = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
110
return sum([ (XX(AZ[i]+AZ[j])-X1[0](AZ[i])*X1[1](AZ[j]))**2 for i in range(26) for j in range(26) ])
111
112