Path: blob/master/sage/monoids/string_monoid_element.py
4045 views
"""1String Monoid Elements23AUTHORS:45- David Kohel <[email protected]>, 2007-0167Elements of free string monoids, internal representation subject to change.89These are special classes of free monoid elements with distinct printing.1011The internal representation of elements does not use the exponential12compression of FreeMonoid elements (a feature), and could be packed into words.13"""1415#*****************************************************************************16# Copyright (C) 2007 David Kohel <[email protected]>17#18# Distributed under the terms of the GNU General Public License (GPL)19#20# http://www.gnu.org/licenses/21#*****************************************************************************2223# import operator24from sage.rings.integer import Integer25from sage.rings.all import RealField26# from sage.structure.element import MonoidElement27from sage.probability.random_variable import DiscreteProbabilitySpace28from free_monoid_element import FreeMonoidElement29import string_monoid3031def is_StringMonoidElement(x):32r"""33"""34return isinstance(x, StringMonoidElement)3536def is_AlphabeticStringMonoidElement(x):37r"""38"""39return isinstance(x, StringMonoidElement) and \40isinstance(x.parent(), string_monoid.AlphabeticStringMonoid)4142def is_BinaryStringMonoidElement(x):43r"""44"""45return isinstance(x, StringMonoidElement) and \46isinstance(x.parent(), string_monoid.BinaryStringMonoid)4748def is_OctalStringMonoidElement(x):49r"""50"""51return isinstance(x, StringMonoidElement) and \52isinstance(x.parent(), string_monoid.OctalStringMonoid)5354def is_HexadecimalStringMonoidElement(x):55r"""56"""57return isinstance(x, StringMonoidElement) and \58isinstance(x.parent(), string_monoid.HexadecimalStringMonoid)5960def is_Radix64StringMonoidElement(x):61r"""62"""63return isinstance(x, StringMonoidElement) and \64isinstance(x.parent(), string_monoid.Radix64StringMonoid)656667class StringMonoidElement(FreeMonoidElement):68"""69Element of a free string monoid.70"""7172def __init__(self, S, x, check=True):73"""74Create the element ``x`` of the StringMonoid ``S``.7576This should typically be called by a StringMonoid.77"""78FreeMonoidElement.__init__(self, S, [])79if isinstance(x, list):80if check:81for b in x:82if not isinstance(b, (int, long, Integer)):83raise TypeError(84"x (= %s) must be a list of integers." % x)85self._element_list = list(x) # make copy86elif isinstance(x, str):87alphabet = list(self.parent().alphabet())88self._element_list = []89for i in range(len(x)):90try:91b = alphabet.index(x[i])92except ValueError:93raise TypeError(94"Argument x (= %s) is not a valid string." % x)95self._element_list += [b]96else:97raise TypeError("Argument x (= %s) is of the wrong type." % x)9899def __cmp__(left, right):100"""101Compare two free monoid elements with the same parents.102103The ordering is the one on the underlying sorted list of104(monomial,coefficients) pairs.105106EXAMPLES::107108sage: S = BinaryStrings()109sage: (x,y) = S.gens()110sage: x * y < y * x111True112sage: S("01") < S("10")113True114"""115return cmp(left._element_list, right._element_list)116117def _repr_(self):118"""119The self-representation of a string monoid element. Unlike Python120strings we print without the enclosing quotes.121"""122S = self.parent()123alphabet = S.alphabet()124s = ''125for b in self._element_list:126c = alphabet[b]127s += c128return s129130def _latex_(self):131"""132Return latex representation of self.133134EXAMPLES::135136sage: S = BinaryStrings()137sage: s = S('101111000')138sage: latex(s)139101111000140"""141return self._repr_()142143def __mul__(self, y):144"""145Multiply 2 free string monoid elements.146147EXAMPLES::148149sage: S = BinaryStrings()150sage: (x,y) = S.gens()151sage: x*y15201153"""154if not isinstance(y, StringMonoidElement):155raise TypeError("Argument y (= %s) is of wrong type." % y)156S = self.parent()157x_elt = self._element_list158y_elt = y._element_list159z = S('')160z._element_list = x_elt + y_elt161return z162163def __pow__(self, n):164"""165Return the `n`-th power of the string element.166167EXAMPLES::168169sage: (x,y) = BinaryStrings().gens()170sage: x**3 * y**5 * x**7171000111110000000172sage: x**0173174175Note that raising to a negative power is *not* a constructor176for an element of the corresponding free group (yet).177178::179180sage: x**(-1)181Traceback (most recent call last):182...183IndexError: Argument n (= -1) must be non-negative.184"""185if not isinstance(n, (int, long, Integer)):186raise TypeError("Argument n (= %s) must be an integer." % n)187if n < 0:188raise IndexError("Argument n (= %s) must be non-negative." % n)189elif n == 0:190return self.parent()('')191elif n == 1:192return self193z = self.parent()('')194z._element_list = self._element_list * n195return z196197def __len__(self):198"""199Return the number of products that occur in this monoid element.200For example, the length of the identity is 0, and the length201of the monoid `x_0^2x_1` is three.202203EXAMPLES::204205sage: S = BinaryStrings()206sage: z = S('')207sage: len(z)2080209sage: (x,y) = S.gens()210sage: len(x**2 * y**3)2115212"""213return len(self._element_list)214215def __iter__(self):216"""217Return an iterator over this element as a string.218219EXAMPLES::220221sage: t = AlphabeticStrings()('SHRUBBERY')222sage: t.__iter__().next()223S224sage: list(t)225[S, H, R, U, B, B, E, R, Y]226"""227l = len(self._element_list)228i=0229while i < l:230yield self[i]231i+=1232raise StopIteration233234def __getitem__(self, n):235"""236Return the n-th string character.237238EXAMPLES::239240sage: t = AlphabeticStrings()('SHRUBBERY')241sage: t[0]242S243sage: t[3]244U245sage: t[-1]246Y247"""248try:249c = self._element_list[n]250except:251raise IndexError("Argument n (= %s) is not a valid index." % n)252if not isinstance(c, list):253c = [c]254return self.parent()(c)255256def decoding(self, padic=False):257r"""258The byte string associated to a binary or hexadecimal string259monoid element.260261EXAMPLES::262263sage: S = HexadecimalStrings()264sage: s = S.encoding("A..Za..z"); s265412e2e5a612e2e7a266sage: s.decoding()267'A..Za..z'268sage: s = S.encoding("A..Za..z",padic=True); s26914e2e2a516e2e2a7270sage: s.decoding()271'\x14\xe2\xe2\xa5\x16\xe2\xe2\xa7'272sage: s.decoding(padic=True)273'A..Za..z'274sage: S = BinaryStrings()275sage: s = S.encoding("A..Za..z"); s2760100000100101110001011100101101001100001001011100010111001111010277sage: s.decoding()278'A..Za..z'279sage: s = S.encoding("A..Za..z",padic=True); s2801000001001110100011101000101101010000110011101000111010001011110281sage: s.decoding()282'\x82ttZ\x86tt^'283sage: s.decoding(padic=True)284'A..Za..z'285"""286S = self.parent()287from Crypto.Util.number import long_to_bytes288if isinstance(S, string_monoid.AlphabeticStringMonoid):289return ''.join([ long_to_bytes(65+i) for i in self._element_list ])290n = self.__len__()291if isinstance(S, string_monoid.HexadecimalStringMonoid):292if not n % 2 == 0:293"String %s must have even length to determine a byte character string." % str(self)294s = []295x = self._element_list296for k in range(n//2):297m = 2*k298if padic:299c = long_to_bytes(x[m]+16*x[m+1])300else:301c = long_to_bytes(16*x[m]+x[m+1])302s.append(c)303return ''.join(s)304if isinstance(S, string_monoid.BinaryStringMonoid):305if not n % 8 == 0:306"String %s must have even length 0 mod 8 to determine a byte character string." % str(self)307pows = [ 2**i for i in range(8) ]308s = []309x = self._element_list310for k in range(n//8):311m = 8*k312if padic:313c = long_to_bytes(sum([ x[m+i]*pows[i] for i in range(8) ]))314else:315c = long_to_bytes(sum([ x[m+7-i]*pows[i] for i in range(8) ]))316s.append(c)317return ''.join(s)318raise TypeError(319"Argument %s must be an alphabetic, binary, or hexadecimal string." % str(self))320321def coincidence_index(self, prec=0):322"""323Returns the probability of two randomly chosen characters being equal.324"""325if prec == 0:326RR = RealField()327else:328RR = RealField(prec)329char_dict = {}330for i in self._element_list:331# if char_dict.has_key(i):332# The method .has_key() has been deprecated since Python 2.2. Use333# "k in Dict" instead of "Dict.has_key(k)".334if i in char_dict:335char_dict[i] += 1336else:337char_dict[i] = 1338nn = 0339ci_num = 0340for i in char_dict.keys():341ni = char_dict[i]342nn += ni343ci_num += ni*(ni-1)344ci_den = nn*(nn-1)345return RR(ci_num)/ci_den346347def character_count(self):348r"""349Return the count of each unique character.350351EXAMPLES:352353Count the character frequency in an object comprised of capital354letters of the English alphabet::355356sage: M = AlphabeticStrings().encoding("abcabf")357sage: sorted(M.character_count().items())358[(A, 2), (B, 2), (C, 1), (F, 1)]359360In an object comprised of binary numbers::361362sage: M = BinaryStrings().encoding("abcabf")363sage: sorted(M.character_count().items())364[(0, 28), (1, 20)]365366In an object comprised of octal numbers::367368sage: A = OctalStrings()369sage: M = A([1, 2, 3, 2, 5, 3])370sage: sorted(M.character_count().items())371[(1, 1), (2, 2), (3, 2), (5, 1)]372373In an object comprised of hexadecimal numbers::374375sage: A = HexadecimalStrings()376sage: M = A([1, 2, 4, 6, 2, 4, 15])377sage: sorted(M.character_count().items())378[(1, 1), (2, 2), (4, 2), (6, 1), (f, 1)]379380In an object comprised of radix-64 characters::381382sage: A = Radix64Strings()383sage: M = A([1, 2, 63, 45, 45, 10]); M384BC/ttK385sage: sorted(M.character_count().items())386[(B, 1), (C, 1), (K, 1), (t, 2), (/, 1)]387388TESTS:389390Empty strings return no counts of character frequency::391392sage: M = AlphabeticStrings().encoding("")393sage: M.character_count()394{}395sage: M = BinaryStrings().encoding("")396sage: M.character_count()397{}398sage: A = OctalStrings()399sage: M = A([])400sage: M.character_count()401{}402sage: A = HexadecimalStrings()403sage: M = A([])404sage: M.character_count()405{}406sage: A = Radix64Strings()407sage: M = A([])408sage: M.character_count()409{}410"""411# the character frequency, i.e. the character count412CF = {}413for e in self:414if e in CF:415CF[e] += 1416else:417CF.setdefault(e, 1)418return CF419420def frequency_distribution(self, length=1, prec=0):421"""422Returns the probability space of character frequencies. The output423of this method is different from that of the method424:func:`characteristic_frequency()425<sage.monoids.string_monoid.AlphabeticStringMonoid.characteristic_frequency>`.426One can think of the characteristic frequency probability of an427element in an alphabet `A` as the expected probability of that element428occurring. Let `S` be a string encoded using elements of `A`. The429frequency probability distribution corresponding to `S` provides us430with the frequency probability of each element of `A` as observed431occurring in `S`. Thus one distribution provides expected432probabilities, while the other provides observed probabilities.433434INPUT:435436- ``length`` -- (default ``1``) if ``length=1`` then consider the437probability space of monogram frequency, i.e. probability438distribution of single characters. If ``length=2`` then consider439the probability space of digram frequency, i.e. probability440distribution of pairs of characters. This method currently441supports the generation of probability spaces for monogram442frequency (``length=1``) and digram frequency (``length=2``).443444- ``prec`` -- (default ``0``) a non-negative integer representing445the precision (in number of bits) of a floating-point number. The446default value ``prec=0`` means that we use 53 bits to represent447the mantissa of a floating-point number. For more information on448the precision of floating-point numbers, see the function449:func:`RealField() <sage.rings.real_mpfr.RealField>` or refer to the module450:mod:`real_mpfr <sage.rings.real_mpfr>`.451452EXAMPLES:453454Capital letters of the English alphabet::455456sage: M = AlphabeticStrings().encoding("abcd")457sage: L = M.frequency_distribution().function()458sage: sorted(L.items())459<BLANKLINE>460[(A, 0.250000000000000),461(B, 0.250000000000000),462(C, 0.250000000000000),463(D, 0.250000000000000)]464465The binary number system::466467sage: M = BinaryStrings().encoding("abcd")468sage: L = M.frequency_distribution().function()469sage: sorted(L.items())470[(0, 0.593750000000000), (1, 0.406250000000000)]471472The hexadecimal number system::473474sage: M = HexadecimalStrings().encoding("abcd")475sage: L = M.frequency_distribution().function()476sage: sorted(L.items())477<BLANKLINE>478[(1, 0.125000000000000),479(2, 0.125000000000000),480(3, 0.125000000000000),481(4, 0.125000000000000),482(6, 0.500000000000000)]483484Get the observed frequency probability distribution of digrams in the485string "ABCD". This string consists of the following digrams: "AB",486"BC", and "CD". Now find out the frequency probability of each of487these digrams as they occur in the string "ABCD"::488489sage: M = AlphabeticStrings().encoding("abcd")490sage: D = M.frequency_distribution(length=2).function()491sage: sorted(D.items())492[(AB, 0.333333333333333), (BC, 0.333333333333333), (CD, 0.333333333333333)]493"""494if not length in (1, 2):495raise NotImplementedError("Not implemented")496if prec == 0:497RR = RealField()498else:499RR = RealField(prec)500S = self.parent()501n = S.ngens()502if length == 1:503Alph = S.gens()504else:505Alph = tuple([ x*y for x in S.gens() for y in S.gens() ])506X = {}507N = len(self)-length+1508eps = RR(Integer(1)/N)509for i in range(N):510c = self[i:i+length]511# if X.has_key(c):512# The method .has_key() has been deprecated since Python 2.2. Use513# "k in Dict" instead of "Dict.has_key(k)".514if c in X:515X[c] += eps516else:517X[c] = eps518# Return a dictionary of probability distribution. This should519# allow for easier parsing of the dictionary.520return DiscreteProbabilitySpace(Alph, X, RR)521522523