Path: blob/master/src/sage/monoids/string_monoid_element.py
8815 views
"""1String Monoid Elements23AUTHORS:45- David Kohel <[email protected]>, 2007-0167Elements of free string monoids, internal representation subject to change.89These are special classes of free monoid elements with distinct printing.1011The internal representation of elements does not use the exponential12compression of FreeMonoid elements (a feature), and could be packed into words.13"""1415#*****************************************************************************16# Copyright (C) 2007 David Kohel <[email protected]>17#18# Distributed under the terms of the GNU General Public License (GPL)19#20# http://www.gnu.org/licenses/21#*****************************************************************************2223# import operator24from sage.rings.integer import Integer25from sage.rings.all import RealField26# from sage.structure.element import MonoidElement27from sage.probability.random_variable import DiscreteProbabilitySpace28from free_monoid_element import FreeMonoidElement29import string_monoid3031def is_StringMonoidElement(x):32r"""33"""34return isinstance(x, StringMonoidElement)3536def is_AlphabeticStringMonoidElement(x):37r"""38"""39return isinstance(x, StringMonoidElement) and \40isinstance(x.parent(), string_monoid.AlphabeticStringMonoid)4142def is_BinaryStringMonoidElement(x):43r"""44"""45return isinstance(x, StringMonoidElement) and \46isinstance(x.parent(), string_monoid.BinaryStringMonoid)4748def is_OctalStringMonoidElement(x):49r"""50"""51return isinstance(x, StringMonoidElement) and \52isinstance(x.parent(), string_monoid.OctalStringMonoid)5354def is_HexadecimalStringMonoidElement(x):55r"""56"""57return isinstance(x, StringMonoidElement) and \58isinstance(x.parent(), string_monoid.HexadecimalStringMonoid)5960def is_Radix64StringMonoidElement(x):61r"""62"""63return isinstance(x, StringMonoidElement) and \64isinstance(x.parent(), string_monoid.Radix64StringMonoid)656667class StringMonoidElement(FreeMonoidElement):68"""69Element of a free string monoid.70"""7172def __init__(self, S, x, check=True):73"""74Create the element ``x`` of the StringMonoid ``S``.7576This should typically be called by a StringMonoid.77"""78FreeMonoidElement.__init__(self, S, [])79if isinstance(x, list):80if check:81for b in x:82if not isinstance(b, (int, long, Integer)):83raise TypeError(84"x (= %s) must be a list of integers." % x)85self._element_list = list(x) # make copy86elif isinstance(x, str):87alphabet = list(self.parent().alphabet())88self._element_list = []89for i in range(len(x)):90try:91b = alphabet.index(x[i])92except ValueError:93raise TypeError(94"Argument x (= %s) is not a valid string." % x)95self._element_list += [b]96else:97raise TypeError("Argument x (= %s) is of the wrong type." % x)9899def __cmp__(left, right):100"""101Compare two free monoid elements with the same parents.102103The ordering is the one on the underlying sorted list of104(monomial,coefficients) pairs.105106EXAMPLES::107108sage: S = BinaryStrings()109sage: (x,y) = S.gens()110sage: x * y < y * x111True112sage: S("01") < S("10")113True114"""115return cmp(left._element_list, right._element_list)116117def _repr_(self):118"""119The self-representation of a string monoid element. Unlike Python120strings we print without the enclosing quotes.121"""122S = self.parent()123alphabet = S.alphabet()124s = ''125for b in self._element_list:126c = alphabet[b]127s += c128return s129130def _latex_(self):131"""132Return latex representation of self.133134EXAMPLES::135136sage: S = BinaryStrings()137sage: s = S('101111000')138sage: latex(s)139101111000140"""141return self._repr_()142143def __mul__(self, y):144"""145Multiply 2 free string monoid elements.146147EXAMPLES::148149sage: S = BinaryStrings()150sage: (x,y) = S.gens()151sage: x*y15201153"""154if not isinstance(y, StringMonoidElement):155raise TypeError("Argument y (= %s) is of wrong type." % y)156S = self.parent()157x_elt = self._element_list158y_elt = y._element_list159z = S('')160z._element_list = x_elt + y_elt161return z162163def __pow__(self, n):164"""165Return the `n`-th power of the string element.166167EXAMPLES::168169sage: (x,y) = BinaryStrings().gens()170sage: x**3 * y**5 * x**7171000111110000000172sage: x**0173174175Note that raising to a negative power is *not* a constructor176for an element of the corresponding free group (yet).177178::179180sage: x**(-1)181Traceback (most recent call last):182...183IndexError: Argument n (= -1) must be non-negative.184"""185if not isinstance(n, (int, long, Integer)):186raise TypeError("Argument n (= %s) must be an integer." % n)187if n < 0:188raise IndexError("Argument n (= %s) must be non-negative." % n)189elif n == 0:190return self.parent()('')191elif n == 1:192return self193z = self.parent()('')194z._element_list = self._element_list * n195return z196197def __len__(self):198"""199Return the number of products that occur in this monoid element.200For example, the length of the identity is 0, and the length201of the monoid `x_0^2x_1` is three.202203EXAMPLES::204205sage: S = BinaryStrings()206sage: z = S('')207sage: len(z)2080209sage: (x,y) = S.gens()210sage: len(x**2 * y**3)2115212"""213return len(self._element_list)214215def __iter__(self):216"""217Return an iterator over this element as a string.218219EXAMPLES::220221sage: t = AlphabeticStrings()('SHRUBBERY')222sage: t.__iter__().next()223S224sage: list(t)225[S, H, R, U, B, B, E, R, Y]226"""227l = len(self._element_list)228i=0229while i < l:230yield self[i]231i+=1232raise StopIteration233234def __getitem__(self, n):235"""236Return the n-th string character.237238EXAMPLES::239240sage: t = AlphabeticStrings()('SHRUBBERY')241sage: t[0]242S243sage: t[3]244U245sage: t[-1]246Y247"""248try:249c = self._element_list[n]250except Exception:251raise IndexError("Argument n (= %s) is not a valid index." % n)252if not isinstance(c, list):253c = [c]254return self.parent()(c)255256def decoding(self, padic=False):257r"""258The byte string associated to a binary or hexadecimal string259monoid element.260261EXAMPLES::262263sage: S = HexadecimalStrings()264sage: s = S.encoding("A..Za..z"); s265412e2e5a612e2e7a266sage: s.decoding()267'A..Za..z'268sage: s = S.encoding("A..Za..z",padic=True); s26914e2e2a516e2e2a7270sage: s.decoding()271'\x14\xe2\xe2\xa5\x16\xe2\xe2\xa7'272sage: s.decoding(padic=True)273'A..Za..z'274sage: S = BinaryStrings()275sage: s = S.encoding("A..Za..z"); s2760100000100101110001011100101101001100001001011100010111001111010277sage: s.decoding()278'A..Za..z'279sage: s = S.encoding("A..Za..z",padic=True); s2801000001001110100011101000101101010000110011101000111010001011110281sage: s.decoding()282'\x82ttZ\x86tt^'283sage: s.decoding(padic=True)284'A..Za..z'285"""286S = self.parent()287if isinstance(S, string_monoid.AlphabeticStringMonoid):288return ''.join([ chr(65+i) for i in self._element_list ])289n = self.__len__()290if isinstance(S, string_monoid.HexadecimalStringMonoid):291if not n % 2 == 0:292"String %s must have even length to determine a byte character string." % str(self)293s = []294x = self._element_list295for k in range(n//2):296m = 2*k297if padic:298c = chr(x[m]+16*x[m+1])299else:300c = chr(16*x[m]+x[m+1])301s.append(c)302return ''.join(s)303if isinstance(S, string_monoid.BinaryStringMonoid):304if not n % 8 == 0:305"String %s must have even length 0 mod 8 to determine a byte character string." % str(self)306pows = [ 2**i for i in range(8) ]307s = []308x = self._element_list309for k in range(n//8):310m = 8*k311if padic:312c = chr(sum([ x[m+i]*pows[i] for i in range(8) ]))313else:314c = chr(sum([ x[m+7-i]*pows[i] for i in range(8) ]))315s.append(c)316return ''.join(s)317raise TypeError(318"Argument %s must be an alphabetic, binary, or hexadecimal string." % str(self))319320def coincidence_index(self, prec=0):321"""322Returns the probability of two randomly chosen characters being equal.323"""324if prec == 0:325RR = RealField()326else:327RR = RealField(prec)328char_dict = {}329for i in self._element_list:330# if char_dict.has_key(i):331# The method .has_key() has been deprecated since Python 2.2. Use332# "k in Dict" instead of "Dict.has_key(k)".333if i in char_dict:334char_dict[i] += 1335else:336char_dict[i] = 1337nn = 0338ci_num = 0339for i in char_dict.keys():340ni = char_dict[i]341nn += ni342ci_num += ni*(ni-1)343ci_den = nn*(nn-1)344return RR(ci_num)/ci_den345346def character_count(self):347r"""348Return the count of each unique character.349350EXAMPLES:351352Count the character frequency in an object comprised of capital353letters of the English alphabet::354355sage: M = AlphabeticStrings().encoding("abcabf")356sage: sorted(M.character_count().items())357[(A, 2), (B, 2), (C, 1), (F, 1)]358359In an object comprised of binary numbers::360361sage: M = BinaryStrings().encoding("abcabf")362sage: sorted(M.character_count().items())363[(0, 28), (1, 20)]364365In an object comprised of octal numbers::366367sage: A = OctalStrings()368sage: M = A([1, 2, 3, 2, 5, 3])369sage: sorted(M.character_count().items())370[(1, 1), (2, 2), (3, 2), (5, 1)]371372In an object comprised of hexadecimal numbers::373374sage: A = HexadecimalStrings()375sage: M = A([1, 2, 4, 6, 2, 4, 15])376sage: sorted(M.character_count().items())377[(1, 1), (2, 2), (4, 2), (6, 1), (f, 1)]378379In an object comprised of radix-64 characters::380381sage: A = Radix64Strings()382sage: M = A([1, 2, 63, 45, 45, 10]); M383BC/ttK384sage: sorted(M.character_count().items())385[(B, 1), (C, 1), (K, 1), (t, 2), (/, 1)]386387TESTS:388389Empty strings return no counts of character frequency::390391sage: M = AlphabeticStrings().encoding("")392sage: M.character_count()393{}394sage: M = BinaryStrings().encoding("")395sage: M.character_count()396{}397sage: A = OctalStrings()398sage: M = A([])399sage: M.character_count()400{}401sage: A = HexadecimalStrings()402sage: M = A([])403sage: M.character_count()404{}405sage: A = Radix64Strings()406sage: M = A([])407sage: M.character_count()408{}409"""410# the character frequency, i.e. the character count411CF = {}412for e in self:413if e in CF:414CF[e] += 1415else:416CF.setdefault(e, 1)417return CF418419def frequency_distribution(self, length=1, prec=0):420"""421Returns the probability space of character frequencies. The output422of this method is different from that of the method423:func:`characteristic_frequency()424<sage.monoids.string_monoid.AlphabeticStringMonoid.characteristic_frequency>`.425One can think of the characteristic frequency probability of an426element in an alphabet `A` as the expected probability of that element427occurring. Let `S` be a string encoded using elements of `A`. The428frequency probability distribution corresponding to `S` provides us429with the frequency probability of each element of `A` as observed430occurring in `S`. Thus one distribution provides expected431probabilities, while the other provides observed probabilities.432433INPUT:434435- ``length`` -- (default ``1``) if ``length=1`` then consider the436probability space of monogram frequency, i.e. probability437distribution of single characters. If ``length=2`` then consider438the probability space of digram frequency, i.e. probability439distribution of pairs of characters. This method currently440supports the generation of probability spaces for monogram441frequency (``length=1``) and digram frequency (``length=2``).442443- ``prec`` -- (default ``0``) a non-negative integer representing444the precision (in number of bits) of a floating-point number. The445default value ``prec=0`` means that we use 53 bits to represent446the mantissa of a floating-point number. For more information on447the precision of floating-point numbers, see the function448:func:`RealField() <sage.rings.real_mpfr.RealField>` or refer to the module449:mod:`real_mpfr <sage.rings.real_mpfr>`.450451EXAMPLES:452453Capital letters of the English alphabet::454455sage: M = AlphabeticStrings().encoding("abcd")456sage: L = M.frequency_distribution().function()457sage: sorted(L.items())458<BLANKLINE>459[(A, 0.250000000000000),460(B, 0.250000000000000),461(C, 0.250000000000000),462(D, 0.250000000000000)]463464The binary number system::465466sage: M = BinaryStrings().encoding("abcd")467sage: L = M.frequency_distribution().function()468sage: sorted(L.items())469[(0, 0.593750000000000), (1, 0.406250000000000)]470471The hexadecimal number system::472473sage: M = HexadecimalStrings().encoding("abcd")474sage: L = M.frequency_distribution().function()475sage: sorted(L.items())476<BLANKLINE>477[(1, 0.125000000000000),478(2, 0.125000000000000),479(3, 0.125000000000000),480(4, 0.125000000000000),481(6, 0.500000000000000)]482483Get the observed frequency probability distribution of digrams in the484string "ABCD". This string consists of the following digrams: "AB",485"BC", and "CD". Now find out the frequency probability of each of486these digrams as they occur in the string "ABCD"::487488sage: M = AlphabeticStrings().encoding("abcd")489sage: D = M.frequency_distribution(length=2).function()490sage: sorted(D.items())491[(AB, 0.333333333333333), (BC, 0.333333333333333), (CD, 0.333333333333333)]492"""493if not length in (1, 2):494raise NotImplementedError("Not implemented")495if prec == 0:496RR = RealField()497else:498RR = RealField(prec)499S = self.parent()500n = S.ngens()501if length == 1:502Alph = S.gens()503else:504Alph = tuple([ x*y for x in S.gens() for y in S.gens() ])505X = {}506N = len(self)-length+1507eps = RR(Integer(1)/N)508for i in range(N):509c = self[i:i+length]510# if X.has_key(c):511# The method .has_key() has been deprecated since Python 2.2. Use512# "k in Dict" instead of "Dict.has_key(k)".513if c in X:514X[c] += eps515else:516X[c] = eps517# Return a dictionary of probability distribution. This should518# allow for easier parsing of the dictionary.519return DiscreteProbabilitySpace(Alph, X, RR)520521522