Path: blob/master/venv/Lib/site-packages/setuptools/_vendor/pyparsing.py
811 views
# module pyparsing.py1#2# Copyright (c) 2003-2018 Paul T. McGuire3#4# Permission is hereby granted, free of charge, to any person obtaining5# a copy of this software and associated documentation files (the6# "Software"), to deal in the Software without restriction, including7# without limitation the rights to use, copy, modify, merge, publish,8# distribute, sublicense, and/or sell copies of the Software, and to9# permit persons to whom the Software is furnished to do so, subject to10# the following conditions:11#12# The above copyright notice and this permission notice shall be13# included in all copies or substantial portions of the Software.14#15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,16# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF17# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.18# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY19# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,20# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE21# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.22#2324__doc__ = \25"""26pyparsing module - Classes and methods to define and execute parsing grammars27=============================================================================2829The pyparsing module is an alternative approach to creating and executing simple grammars,30vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you31don't need to learn a new syntax for defining grammars or matching expressions - the parsing module32provides a library of classes that you use to construct the grammar directly in Python.3334Here is a program to parse "Hello, World!" (or any greeting of the form35C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements36(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to37L{Literal} expressions)::3839from pyparsing import Word, alphas4041# define grammar of a greeting42greet = Word(alphas) + "," + Word(alphas) + "!"4344hello = "Hello, World!"45print (hello, "->", greet.parseString(hello))4647The program outputs the following::4849Hello, World! -> ['Hello', ',', 'World', '!']5051The Python representation of the grammar is quite readable, owing to the self-explanatory52class names, and the use of '+', '|' and '^' operators.5354The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an55object with named attributes.5657The pyparsing module handles some of the problems that are typically vexing when writing text parsers:58- extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)59- quoted strings60- embedded comments616263Getting Started -64-----------------65Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing66classes inherit from. Use the docstrings for examples of how to:67- construct literal match expressions from L{Literal} and L{CaselessLiteral} classes68- construct character word-group expressions using the L{Word} class69- see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes70- use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones71- associate names with your parsed results using L{ParserElement.setResultsName}72- find some helpful expression short-cuts like L{delimitedList} and L{oneOf}73- find more useful common expressions in the L{pyparsing_common} namespace class74"""7576__version__ = "2.2.1"77__versionTime__ = "18 Sep 2018 00:49 UTC"78__author__ = "Paul McGuire <[email protected]>"7980import string81from weakref import ref as wkref82import copy83import sys84import warnings85import re86import sre_constants87import collections88import pprint89import traceback90import types91from datetime import datetime9293try:94from _thread import RLock95except ImportError:96from threading import RLock9798try:99# Python 3100from collections.abc import Iterable101from collections.abc import MutableMapping102except ImportError:103# Python 2.7104from collections import Iterable105from collections import MutableMapping106107try:108from collections import OrderedDict as _OrderedDict109except ImportError:110try:111from ordereddict import OrderedDict as _OrderedDict112except ImportError:113_OrderedDict = None114115#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )116117__all__ = [118'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',119'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',120'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',121'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',122'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',123'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',124'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',125'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',126'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',127'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',128'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',129'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',130'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',131'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',132'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',133'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',134'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',135'CloseMatch', 'tokenMap', 'pyparsing_common',136]137138system_version = tuple(sys.version_info)[:3]139PY_3 = system_version[0] == 3140if PY_3:141_MAX_INT = sys.maxsize142basestring = str143unichr = chr144_ustr = str145146# build list of single arg builtins, that can be used as parse actions147singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]148149else:150_MAX_INT = sys.maxint151range = xrange152153def _ustr(obj):154"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries155str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It156then < returns the unicode object | encodes it with the default encoding | ... >.157"""158if isinstance(obj,unicode):159return obj160161try:162# If this works, then _ustr(obj) has the same behaviour as str(obj), so163# it won't break any existing code.164return str(obj)165166except UnicodeEncodeError:167# Else encode it168ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')169xmlcharref = Regex(r'&#\d+;')170xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])171return xmlcharref.transformString(ret)172173# build list of single arg builtins, tolerant of Python version, that can be used as parse actions174singleArgBuiltins = []175import __builtin__176for fname in "sum len sorted reversed list tuple set any all min max".split():177try:178singleArgBuiltins.append(getattr(__builtin__,fname))179except AttributeError:180continue181182_generatorType = type((y for y in range(1)))183184def _xml_escape(data):185"""Escape &, <, >, ", ', etc. in a string of data."""186187# ampersand must be replaced first188from_symbols = '&><"\''189to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())190for from_,to_ in zip(from_symbols, to_symbols):191data = data.replace(from_, to_)192return data193194class _Constants(object):195pass196197alphas = string.ascii_uppercase + string.ascii_lowercase198nums = "0123456789"199hexnums = nums + "ABCDEFabcdef"200alphanums = alphas + nums201_bslash = chr(92)202printables = "".join(c for c in string.printable if c not in string.whitespace)203204class ParseBaseException(Exception):205"""base exception class for all parsing runtime exceptions"""206# Performance tuning: we construct a *lot* of these, so keep this207# constructor as small and fast as possible208def __init__( self, pstr, loc=0, msg=None, elem=None ):209self.loc = loc210if msg is None:211self.msg = pstr212self.pstr = ""213else:214self.msg = msg215self.pstr = pstr216self.parserElement = elem217self.args = (pstr, loc, msg)218219@classmethod220def _from_exception(cls, pe):221"""222internal factory method to simplify creating one type of ParseException223from another - avoids having __init__ signature conflicts among subclasses224"""225return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)226227def __getattr__( self, aname ):228"""supported attributes by name are:229- lineno - returns the line number of the exception text230- col - returns the column number of the exception text231- line - returns the line containing the exception text232"""233if( aname == "lineno" ):234return lineno( self.loc, self.pstr )235elif( aname in ("col", "column") ):236return col( self.loc, self.pstr )237elif( aname == "line" ):238return line( self.loc, self.pstr )239else:240raise AttributeError(aname)241242def __str__( self ):243return "%s (at char %d), (line:%d, col:%d)" % \244( self.msg, self.loc, self.lineno, self.column )245def __repr__( self ):246return _ustr(self)247def markInputline( self, markerString = ">!<" ):248"""Extracts the exception line from the input string, and marks249the location of the exception with a special symbol.250"""251line_str = self.line252line_column = self.column - 1253if markerString:254line_str = "".join((line_str[:line_column],255markerString, line_str[line_column:]))256return line_str.strip()257def __dir__(self):258return "lineno col line".split() + dir(type(self))259260class ParseException(ParseBaseException):261"""262Exception thrown when parse expressions don't match class;263supported attributes by name are:264- lineno - returns the line number of the exception text265- col - returns the column number of the exception text266- line - returns the line containing the exception text267268Example::269try:270Word(nums).setName("integer").parseString("ABC")271except ParseException as pe:272print(pe)273print("column: {}".format(pe.col))274275prints::276Expected integer (at char 0), (line:1, col:1)277column: 1278"""279pass280281class ParseFatalException(ParseBaseException):282"""user-throwable exception thrown when inconsistent parse content283is found; stops all parsing immediately"""284pass285286class ParseSyntaxException(ParseFatalException):287"""just like L{ParseFatalException}, but thrown internally when an288L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop289immediately because an unbacktrackable syntax error has been found"""290pass291292#~ class ReparseException(ParseBaseException):293#~ """Experimental class - parse actions can raise this exception to cause294#~ pyparsing to reparse the input string:295#~ - with a modified input string, and/or296#~ - with a modified start location297#~ Set the values of the ReparseException in the constructor, and raise the298#~ exception in a parse action to cause pyparsing to use the new string/location.299#~ Setting the values as None causes no change to be made.300#~ """301#~ def __init_( self, newstring, restartLoc ):302#~ self.newParseText = newstring303#~ self.reparseLoc = restartLoc304305class RecursiveGrammarException(Exception):306"""exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""307def __init__( self, parseElementList ):308self.parseElementTrace = parseElementList309310def __str__( self ):311return "RecursiveGrammarException: %s" % self.parseElementTrace312313class _ParseResultsWithOffset(object):314def __init__(self,p1,p2):315self.tup = (p1,p2)316def __getitem__(self,i):317return self.tup[i]318def __repr__(self):319return repr(self.tup[0])320def setOffset(self,i):321self.tup = (self.tup[0],i)322323class ParseResults(object):324"""325Structured parse results, to provide multiple means of access to the parsed data:326- as a list (C{len(results)})327- by list index (C{results[0], results[1]}, etc.)328- by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})329330Example::331integer = Word(nums)332date_str = (integer.setResultsName("year") + '/'333+ integer.setResultsName("month") + '/'334+ integer.setResultsName("day"))335# equivalent form:336# date_str = integer("year") + '/' + integer("month") + '/' + integer("day")337338# parseString returns a ParseResults object339result = date_str.parseString("1999/12/31")340341def test(s, fn=repr):342print("%s -> %s" % (s, fn(eval(s))))343test("list(result)")344test("result[0]")345test("result['month']")346test("result.day")347test("'month' in result")348test("'minutes' in result")349test("result.dump()", str)350prints::351list(result) -> ['1999', '/', '12', '/', '31']352result[0] -> '1999'353result['month'] -> '12'354result.day -> '31'355'month' in result -> True356'minutes' in result -> False357result.dump() -> ['1999', '/', '12', '/', '31']358- day: 31359- month: 12360- year: 1999361"""362def __new__(cls, toklist=None, name=None, asList=True, modal=True ):363if isinstance(toklist, cls):364return toklist365retobj = object.__new__(cls)366retobj.__doinit = True367return retobj368369# Performance tuning: we construct a *lot* of these, so keep this370# constructor as small and fast as possible371def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):372if self.__doinit:373self.__doinit = False374self.__name = None375self.__parent = None376self.__accumNames = {}377self.__asList = asList378self.__modal = modal379if toklist is None:380toklist = []381if isinstance(toklist, list):382self.__toklist = toklist[:]383elif isinstance(toklist, _generatorType):384self.__toklist = list(toklist)385else:386self.__toklist = [toklist]387self.__tokdict = dict()388389if name is not None and name:390if not modal:391self.__accumNames[name] = 0392if isinstance(name,int):393name = _ustr(name) # will always return a str, but use _ustr for consistency394self.__name = name395if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):396if isinstance(toklist,basestring):397toklist = [ toklist ]398if asList:399if isinstance(toklist,ParseResults):400self[name] = _ParseResultsWithOffset(toklist.copy(),0)401else:402self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)403self[name].__name = name404else:405try:406self[name] = toklist[0]407except (KeyError,TypeError,IndexError):408self[name] = toklist409410def __getitem__( self, i ):411if isinstance( i, (int,slice) ):412return self.__toklist[i]413else:414if i not in self.__accumNames:415return self.__tokdict[i][-1][0]416else:417return ParseResults([ v[0] for v in self.__tokdict[i] ])418419def __setitem__( self, k, v, isinstance=isinstance ):420if isinstance(v,_ParseResultsWithOffset):421self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]422sub = v[0]423elif isinstance(k,(int,slice)):424self.__toklist[k] = v425sub = v426else:427self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]428sub = v429if isinstance(sub,ParseResults):430sub.__parent = wkref(self)431432def __delitem__( self, i ):433if isinstance(i,(int,slice)):434mylen = len( self.__toklist )435del self.__toklist[i]436437# convert int to slice438if isinstance(i, int):439if i < 0:440i += mylen441i = slice(i, i+1)442# get removed indices443removed = list(range(*i.indices(mylen)))444removed.reverse()445# fixup indices in token dictionary446for name,occurrences in self.__tokdict.items():447for j in removed:448for k, (value, position) in enumerate(occurrences):449occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))450else:451del self.__tokdict[i]452453def __contains__( self, k ):454return k in self.__tokdict455456def __len__( self ): return len( self.__toklist )457def __bool__(self): return ( not not self.__toklist )458__nonzero__ = __bool__459def __iter__( self ): return iter( self.__toklist )460def __reversed__( self ): return iter( self.__toklist[::-1] )461def _iterkeys( self ):462if hasattr(self.__tokdict, "iterkeys"):463return self.__tokdict.iterkeys()464else:465return iter(self.__tokdict)466467def _itervalues( self ):468return (self[k] for k in self._iterkeys())469470def _iteritems( self ):471return ((k, self[k]) for k in self._iterkeys())472473if PY_3:474keys = _iterkeys475"""Returns an iterator of all named result keys (Python 3.x only)."""476477values = _itervalues478"""Returns an iterator of all named result values (Python 3.x only)."""479480items = _iteritems481"""Returns an iterator of all named result key-value tuples (Python 3.x only)."""482483else:484iterkeys = _iterkeys485"""Returns an iterator of all named result keys (Python 2.x only)."""486487itervalues = _itervalues488"""Returns an iterator of all named result values (Python 2.x only)."""489490iteritems = _iteritems491"""Returns an iterator of all named result key-value tuples (Python 2.x only)."""492493def keys( self ):494"""Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""495return list(self.iterkeys())496497def values( self ):498"""Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""499return list(self.itervalues())500501def items( self ):502"""Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""503return list(self.iteritems())504505def haskeys( self ):506"""Since keys() returns an iterator, this method is helpful in bypassing507code that looks for the existence of any defined results names."""508return bool(self.__tokdict)509510def pop( self, *args, **kwargs):511"""512Removes and returns item at specified index (default=C{last}).513Supports both C{list} and C{dict} semantics for C{pop()}. If passed no514argument or an integer argument, it will use C{list} semantics515and pop tokens from the list of parsed tokens. If passed a516non-integer argument (most likely a string), it will use C{dict}517semantics and pop the corresponding value from any defined518results names. A second default return value argument is519supported, just as in C{dict.pop()}.520521Example::522def remove_first(tokens):523tokens.pop(0)524print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']525print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']526527label = Word(alphas)528patt = label("LABEL") + OneOrMore(Word(nums))529print(patt.parseString("AAB 123 321").dump())530531# Use pop() in a parse action to remove named result (note that corresponding value is not532# removed from list form of results)533def remove_LABEL(tokens):534tokens.pop("LABEL")535return tokens536patt.addParseAction(remove_LABEL)537print(patt.parseString("AAB 123 321").dump())538prints::539['AAB', '123', '321']540- LABEL: AAB541542['AAB', '123', '321']543"""544if not args:545args = [-1]546for k,v in kwargs.items():547if k == 'default':548args = (args[0], v)549else:550raise TypeError("pop() got an unexpected keyword argument '%s'" % k)551if (isinstance(args[0], int) or552len(args) == 1 or553args[0] in self):554index = args[0]555ret = self[index]556del self[index]557return ret558else:559defaultvalue = args[1]560return defaultvalue561562def get(self, key, defaultValue=None):563"""564Returns named result matching the given key, or if there is no565such name, then returns the given C{defaultValue} or C{None} if no566C{defaultValue} is specified.567568Similar to C{dict.get()}.569570Example::571integer = Word(nums)572date_str = integer("year") + '/' + integer("month") + '/' + integer("day")573574result = date_str.parseString("1999/12/31")575print(result.get("year")) # -> '1999'576print(result.get("hour", "not specified")) # -> 'not specified'577print(result.get("hour")) # -> None578"""579if key in self:580return self[key]581else:582return defaultValue583584def insert( self, index, insStr ):585"""586Inserts new element at location index in the list of parsed tokens.587588Similar to C{list.insert()}.589590Example::591print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']592593# use a parse action to insert the parse location in the front of the parsed results594def insert_locn(locn, tokens):595tokens.insert(0, locn)596print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']597"""598self.__toklist.insert(index, insStr)599# fixup indices in token dictionary600for name,occurrences in self.__tokdict.items():601for k, (value, position) in enumerate(occurrences):602occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))603604def append( self, item ):605"""606Add single element to end of ParseResults list of elements.607608Example::609print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']610611# use a parse action to compute the sum of the parsed integers, and add it to the end612def append_sum(tokens):613tokens.append(sum(map(int, tokens)))614print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]615"""616self.__toklist.append(item)617618def extend( self, itemseq ):619"""620Add sequence of elements to end of ParseResults list of elements.621622Example::623patt = OneOrMore(Word(alphas))624625# use a parse action to append the reverse of the matched strings, to make a palindrome626def make_palindrome(tokens):627tokens.extend(reversed([t[::-1] for t in tokens]))628return ''.join(tokens)629print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'630"""631if isinstance(itemseq, ParseResults):632self += itemseq633else:634self.__toklist.extend(itemseq)635636def clear( self ):637"""638Clear all elements and results names.639"""640del self.__toklist[:]641self.__tokdict.clear()642643def __getattr__( self, name ):644try:645return self[name]646except KeyError:647return ""648649if name in self.__tokdict:650if name not in self.__accumNames:651return self.__tokdict[name][-1][0]652else:653return ParseResults([ v[0] for v in self.__tokdict[name] ])654else:655return ""656657def __add__( self, other ):658ret = self.copy()659ret += other660return ret661662def __iadd__( self, other ):663if other.__tokdict:664offset = len(self.__toklist)665addoffset = lambda a: offset if a<0 else a+offset666otheritems = other.__tokdict.items()667otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )668for (k,vlist) in otheritems for v in vlist]669for k,v in otherdictitems:670self[k] = v671if isinstance(v[0],ParseResults):672v[0].__parent = wkref(self)673674self.__toklist += other.__toklist675self.__accumNames.update( other.__accumNames )676return self677678def __radd__(self, other):679if isinstance(other,int) and other == 0:680# useful for merging many ParseResults using sum() builtin681return self.copy()682else:683# this may raise a TypeError - so be it684return other + self685686def __repr__( self ):687return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )688689def __str__( self ):690return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'691692def _asStringList( self, sep='' ):693out = []694for item in self.__toklist:695if out and sep:696out.append(sep)697if isinstance( item, ParseResults ):698out += item._asStringList()699else:700out.append( _ustr(item) )701return out702703def asList( self ):704"""705Returns the parse results as a nested list of matching tokens, all converted to strings.706707Example::708patt = OneOrMore(Word(alphas))709result = patt.parseString("sldkj lsdkj sldkj")710# even though the result prints in string-like form, it is actually a pyparsing ParseResults711print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']712713# Use asList() to create an actual list714result_list = result.asList()715print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']716"""717return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]718719def asDict( self ):720"""721Returns the named parse results as a nested dictionary.722723Example::724integer = Word(nums)725date_str = integer("year") + '/' + integer("month") + '/' + integer("day")726727result = date_str.parseString('12/31/1999')728print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})729730result_dict = result.asDict()731print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}732733# even though a ParseResults supports dict-like access, sometime you just need to have a dict734import json735print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable736print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}737"""738if PY_3:739item_fn = self.items740else:741item_fn = self.iteritems742743def toItem(obj):744if isinstance(obj, ParseResults):745if obj.haskeys():746return obj.asDict()747else:748return [toItem(v) for v in obj]749else:750return obj751752return dict((k,toItem(v)) for k,v in item_fn())753754def copy( self ):755"""756Returns a new copy of a C{ParseResults} object.757"""758ret = ParseResults( self.__toklist )759ret.__tokdict = self.__tokdict.copy()760ret.__parent = self.__parent761ret.__accumNames.update( self.__accumNames )762ret.__name = self.__name763return ret764765def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):766"""767(Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.768"""769nl = "\n"770out = []771namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()772for v in vlist)773nextLevelIndent = indent + " "774775# collapse out indents if formatting is not desired776if not formatted:777indent = ""778nextLevelIndent = ""779nl = ""780781selfTag = None782if doctag is not None:783selfTag = doctag784else:785if self.__name:786selfTag = self.__name787788if not selfTag:789if namedItemsOnly:790return ""791else:792selfTag = "ITEM"793794out += [ nl, indent, "<", selfTag, ">" ]795796for i,res in enumerate(self.__toklist):797if isinstance(res,ParseResults):798if i in namedItems:799out += [ res.asXML(namedItems[i],800namedItemsOnly and doctag is None,801nextLevelIndent,802formatted)]803else:804out += [ res.asXML(None,805namedItemsOnly and doctag is None,806nextLevelIndent,807formatted)]808else:809# individual token, see if there is a name for it810resTag = None811if i in namedItems:812resTag = namedItems[i]813if not resTag:814if namedItemsOnly:815continue816else:817resTag = "ITEM"818xmlBodyText = _xml_escape(_ustr(res))819out += [ nl, nextLevelIndent, "<", resTag, ">",820xmlBodyText,821"</", resTag, ">" ]822823out += [ nl, indent, "</", selfTag, ">" ]824return "".join(out)825826def __lookup(self,sub):827for k,vlist in self.__tokdict.items():828for v,loc in vlist:829if sub is v:830return k831return None832833def getName(self):834r"""835Returns the results name for this token expression. Useful when several836different expressions might match at a particular location.837838Example::839integer = Word(nums)840ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")841house_number_expr = Suppress('#') + Word(nums, alphanums)842user_data = (Group(house_number_expr)("house_number")843| Group(ssn_expr)("ssn")844| Group(integer)("age"))845user_info = OneOrMore(user_data)846847result = user_info.parseString("22 111-22-3333 #221B")848for item in result:849print(item.getName(), ':', item[0])850prints::851age : 22852ssn : 111-22-3333853house_number : 221B854"""855if self.__name:856return self.__name857elif self.__parent:858par = self.__parent()859if par:860return par.__lookup(self)861else:862return None863elif (len(self) == 1 and864len(self.__tokdict) == 1 and865next(iter(self.__tokdict.values()))[0][1] in (0,-1)):866return next(iter(self.__tokdict.keys()))867else:868return None869870def dump(self, indent='', depth=0, full=True):871"""872Diagnostic method for listing out the contents of a C{ParseResults}.873Accepts an optional C{indent} argument so that this string can be embedded874in a nested display of other data.875876Example::877integer = Word(nums)878date_str = integer("year") + '/' + integer("month") + '/' + integer("day")879880result = date_str.parseString('12/31/1999')881print(result.dump())882prints::883['12', '/', '31', '/', '1999']884- day: 1999885- month: 31886- year: 12887"""888out = []889NL = '\n'890out.append( indent+_ustr(self.asList()) )891if full:892if self.haskeys():893items = sorted((str(k), v) for k,v in self.items())894for k,v in items:895if out:896out.append(NL)897out.append( "%s%s- %s: " % (indent,(' '*depth), k) )898if isinstance(v,ParseResults):899if v:900out.append( v.dump(indent,depth+1) )901else:902out.append(_ustr(v))903else:904out.append(repr(v))905elif any(isinstance(vv,ParseResults) for vv in self):906v = self907for i,vv in enumerate(v):908if isinstance(vv,ParseResults):909out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))910else:911out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))912913return "".join(out)914915def pprint(self, *args, **kwargs):916"""917Pretty-printer for parsed results as a list, using the C{pprint} module.918Accepts additional positional or keyword args as defined for the919C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})920921Example::922ident = Word(alphas, alphanums)923num = Word(nums)924func = Forward()925term = ident | num | Group('(' + func + ')')926func <<= ident + Group(Optional(delimitedList(term)))927result = func.parseString("fna a,b,(fnb c,d,200),100")928result.pprint(width=40)929prints::930['fna',931['a',932'b',933['(', 'fnb', ['c', 'd', '200'], ')'],934'100']]935"""936pprint.pprint(self.asList(), *args, **kwargs)937938# add support for pickle protocol939def __getstate__(self):940return ( self.__toklist,941( self.__tokdict.copy(),942self.__parent is not None and self.__parent() or None,943self.__accumNames,944self.__name ) )945946def __setstate__(self,state):947self.__toklist = state[0]948(self.__tokdict,949par,950inAccumNames,951self.__name) = state[1]952self.__accumNames = {}953self.__accumNames.update(inAccumNames)954if par is not None:955self.__parent = wkref(par)956else:957self.__parent = None958959def __getnewargs__(self):960return self.__toklist, self.__name, self.__asList, self.__modal961962def __dir__(self):963return (dir(type(self)) + list(self.keys()))964965MutableMapping.register(ParseResults)966967def col (loc,strg):968"""Returns current column within a string, counting newlines as line separators.969The first column is number 1.970971Note: the default parsing behavior is to expand tabs in the input string972before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information973on parsing strings containing C{<TAB>}s, and suggested methods to maintain a974consistent view of the parsed string, the parse location, and line and column975positions within the parsed string.976"""977s = strg978return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)979980def lineno(loc,strg):981"""Returns current line number within a string, counting newlines as line separators.982The first line is number 1.983984Note: the default parsing behavior is to expand tabs in the input string985before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information986on parsing strings containing C{<TAB>}s, and suggested methods to maintain a987consistent view of the parsed string, the parse location, and line and column988positions within the parsed string.989"""990return strg.count("\n",0,loc) + 1991992def line( loc, strg ):993"""Returns the line of text containing loc within a string, counting newlines as line separators.994"""995lastCR = strg.rfind("\n", 0, loc)996nextCR = strg.find("\n", loc)997if nextCR >= 0:998return strg[lastCR+1:nextCR]999else:1000return strg[lastCR+1:]10011002def _defaultStartDebugAction( instring, loc, expr ):1003print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))10041005def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):1006print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))10071008def _defaultExceptionDebugAction( instring, loc, expr, exc ):1009print ("Exception raised:" + _ustr(exc))10101011def nullDebugAction(*args):1012"""'Do-nothing' debug action, to suppress debugging output during parsing."""1013pass10141015# Only works on Python 3.x - nonlocal is toxic to Python 2 installs1016#~ 'decorator to trim function calls to match the arity of the target'1017#~ def _trim_arity(func, maxargs=3):1018#~ if func in singleArgBuiltins:1019#~ return lambda s,l,t: func(t)1020#~ limit = 01021#~ foundArity = False1022#~ def wrapper(*args):1023#~ nonlocal limit,foundArity1024#~ while 1:1025#~ try:1026#~ ret = func(*args[limit:])1027#~ foundArity = True1028#~ return ret1029#~ except TypeError:1030#~ if limit == maxargs or foundArity:1031#~ raise1032#~ limit += 11033#~ continue1034#~ return wrapper10351036# this version is Python 2.x-3.x cross-compatible1037'decorator to trim function calls to match the arity of the target'1038def _trim_arity(func, maxargs=2):1039if func in singleArgBuiltins:1040return lambda s,l,t: func(t)1041limit = [0]1042foundArity = [False]10431044# traceback return data structure changed in Py3.5 - normalize back to plain tuples1045if system_version[:2] >= (3,5):1046def extract_stack(limit=0):1047# special handling for Python 3.5.0 - extra deep call stack by 11048offset = -3 if system_version == (3,5,0) else -21049frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]1050return [frame_summary[:2]]1051def extract_tb(tb, limit=0):1052frames = traceback.extract_tb(tb, limit=limit)1053frame_summary = frames[-1]1054return [frame_summary[:2]]1055else:1056extract_stack = traceback.extract_stack1057extract_tb = traceback.extract_tb10581059# synthesize what would be returned by traceback.extract_stack at the call to1060# user's parse action 'func', so that we don't incur call penalty at parse time10611062LINE_DIFF = 61063# IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND1064# THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!1065this_line = extract_stack(limit=2)[-1]1066pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)10671068def wrapper(*args):1069while 1:1070try:1071ret = func(*args[limit[0]:])1072foundArity[0] = True1073return ret1074except TypeError:1075# re-raise TypeErrors if they did not come from our arity testing1076if foundArity[0]:1077raise1078else:1079try:1080tb = sys.exc_info()[-1]1081if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:1082raise1083finally:1084del tb10851086if limit[0] <= maxargs:1087limit[0] += 11088continue1089raise10901091# copy func name to wrapper for sensible debug output1092func_name = "<parse action>"1093try:1094func_name = getattr(func, '__name__',1095getattr(func, '__class__').__name__)1096except Exception:1097func_name = str(func)1098wrapper.__name__ = func_name10991100return wrapper11011102class ParserElement(object):1103"""Abstract base level parser element class."""1104DEFAULT_WHITE_CHARS = " \n\t\r"1105verbose_stacktrace = False11061107@staticmethod1108def setDefaultWhitespaceChars( chars ):1109r"""1110Overrides the default whitespace chars11111112Example::1113# default whitespace chars are space, <TAB> and newline1114OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']11151116# change to just treat newline as significant1117ParserElement.setDefaultWhitespaceChars(" \t")1118OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']1119"""1120ParserElement.DEFAULT_WHITE_CHARS = chars11211122@staticmethod1123def inlineLiteralsUsing(cls):1124"""1125Set class to be used for inclusion of string literals into a parser.11261127Example::1128# default literal class used is Literal1129integer = Word(nums)1130date_str = integer("year") + '/' + integer("month") + '/' + integer("day")11311132date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']113311341135# change to Suppress1136ParserElement.inlineLiteralsUsing(Suppress)1137date_str = integer("year") + '/' + integer("month") + '/' + integer("day")11381139date_str.parseString("1999/12/31") # -> ['1999', '12', '31']1140"""1141ParserElement._literalStringClass = cls11421143def __init__( self, savelist=False ):1144self.parseAction = list()1145self.failAction = None1146#~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall1147self.strRepr = None1148self.resultsName = None1149self.saveAsList = savelist1150self.skipWhitespace = True1151self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS1152self.copyDefaultWhiteChars = True1153self.mayReturnEmpty = False # used when checking for left-recursion1154self.keepTabs = False1155self.ignoreExprs = list()1156self.debug = False1157self.streamlined = False1158self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index1159self.errmsg = ""1160self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)1161self.debugActions = ( None, None, None ) #custom debug actions1162self.re = None1163self.callPreparse = True # used to avoid redundant calls to preParse1164self.callDuringTry = False11651166def copy( self ):1167"""1168Make a copy of this C{ParserElement}. Useful for defining different parse actions1169for the same parsing pattern, using copies of the original parse element.11701171Example::1172integer = Word(nums).setParseAction(lambda toks: int(toks[0]))1173integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")1174integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")11751176print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))1177prints::1178[5120, 100, 655360, 268435456]1179Equivalent form of C{expr.copy()} is just C{expr()}::1180integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")1181"""1182cpy = copy.copy( self )1183cpy.parseAction = self.parseAction[:]1184cpy.ignoreExprs = self.ignoreExprs[:]1185if self.copyDefaultWhiteChars:1186cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS1187return cpy11881189def setName( self, name ):1190"""1191Define name for this expression, makes debugging and exception messages clearer.11921193Example::1194Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)1195Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)1196"""1197self.name = name1198self.errmsg = "Expected " + self.name1199if hasattr(self,"exception"):1200self.exception.msg = self.errmsg1201return self12021203def setResultsName( self, name, listAllMatches=False ):1204"""1205Define name for referencing matching tokens as a nested attribute1206of the returned parse results.1207NOTE: this returns a *copy* of the original C{ParserElement} object;1208this is so that the client can define a basic element, such as an1209integer, and reference it in multiple places with different names.12101211You can also set results names using the abbreviated syntax,1212C{expr("name")} in place of C{expr.setResultsName("name")} -1213see L{I{__call__}<__call__>}.12141215Example::1216date_str = (integer.setResultsName("year") + '/'1217+ integer.setResultsName("month") + '/'1218+ integer.setResultsName("day"))12191220# equivalent form:1221date_str = integer("year") + '/' + integer("month") + '/' + integer("day")1222"""1223newself = self.copy()1224if name.endswith("*"):1225name = name[:-1]1226listAllMatches=True1227newself.resultsName = name1228newself.modalResults = not listAllMatches1229return newself12301231def setBreak(self,breakFlag = True):1232"""Method to invoke the Python pdb debugger when this element is1233about to be parsed. Set C{breakFlag} to True to enable, False to1234disable.1235"""1236if breakFlag:1237_parseMethod = self._parse1238def breaker(instring, loc, doActions=True, callPreParse=True):1239import pdb1240pdb.set_trace()1241return _parseMethod( instring, loc, doActions, callPreParse )1242breaker._originalParseMethod = _parseMethod1243self._parse = breaker1244else:1245if hasattr(self._parse,"_originalParseMethod"):1246self._parse = self._parse._originalParseMethod1247return self12481249def setParseAction( self, *fns, **kwargs ):1250"""1251Define one or more actions to perform when successfully matching parse element definition.1252Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},1253C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:1254- s = the original string being parsed (see note below)1255- loc = the location of the matching substring1256- toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object1257If the functions in fns modify the tokens, they can return them as the return1258value from fn, and the modified list of tokens will replace the original.1259Otherwise, fn does not need to return any value.12601261Optional keyword arguments:1262- callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing12631264Note: the default parsing behavior is to expand tabs in the input string1265before starting the parsing process. See L{I{parseString}<parseString>} for more information1266on parsing strings containing C{<TAB>}s, and suggested methods to maintain a1267consistent view of the parsed string, the parse location, and line and column1268positions within the parsed string.12691270Example::1271integer = Word(nums)1272date_str = integer + '/' + integer + '/' + integer12731274date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']12751276# use parse action to convert to ints at parse time1277integer = Word(nums).setParseAction(lambda toks: int(toks[0]))1278date_str = integer + '/' + integer + '/' + integer12791280# note that integer fields are now ints, not strings1281date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]1282"""1283self.parseAction = list(map(_trim_arity, list(fns)))1284self.callDuringTry = kwargs.get("callDuringTry", False)1285return self12861287def addParseAction( self, *fns, **kwargs ):1288"""1289Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.12901291See examples in L{I{copy}<copy>}.1292"""1293self.parseAction += list(map(_trim_arity, list(fns)))1294self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)1295return self12961297def addCondition(self, *fns, **kwargs):1298"""Add a boolean predicate function to expression's list of parse actions. See1299L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},1300functions passed to C{addCondition} need to return boolean success/fail of the condition.13011302Optional keyword arguments:1303- message = define a custom message to be used in the raised exception1304- fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException13051306Example::1307integer = Word(nums).setParseAction(lambda toks: int(toks[0]))1308year_int = integer.copy()1309year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")1310date_str = year_int + '/' + integer + '/' + integer13111312result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)1313"""1314msg = kwargs.get("message", "failed user-defined condition")1315exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException1316for fn in fns:1317def pa(s,l,t):1318if not bool(_trim_arity(fn)(s,l,t)):1319raise exc_type(s,l,msg)1320self.parseAction.append(pa)1321self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)1322return self13231324def setFailAction( self, fn ):1325"""Define action to perform if parsing fails at this expression.1326Fail acton fn is a callable function that takes the arguments1327C{fn(s,loc,expr,err)} where:1328- s = string being parsed1329- loc = location where expression match was attempted and failed1330- expr = the parse expression that failed1331- err = the exception thrown1332The function returns no value. It may throw C{L{ParseFatalException}}1333if it is desired to stop parsing immediately."""1334self.failAction = fn1335return self13361337def _skipIgnorables( self, instring, loc ):1338exprsFound = True1339while exprsFound:1340exprsFound = False1341for e in self.ignoreExprs:1342try:1343while 1:1344loc,dummy = e._parse( instring, loc )1345exprsFound = True1346except ParseException:1347pass1348return loc13491350def preParse( self, instring, loc ):1351if self.ignoreExprs:1352loc = self._skipIgnorables( instring, loc )13531354if self.skipWhitespace:1355wt = self.whiteChars1356instrlen = len(instring)1357while loc < instrlen and instring[loc] in wt:1358loc += 113591360return loc13611362def parseImpl( self, instring, loc, doActions=True ):1363return loc, []13641365def postParse( self, instring, loc, tokenlist ):1366return tokenlist13671368#~ @profile1369def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):1370debugging = ( self.debug ) #and doActions )13711372if debugging or self.failAction:1373#~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))1374if (self.debugActions[0] ):1375self.debugActions[0]( instring, loc, self )1376if callPreParse and self.callPreparse:1377preloc = self.preParse( instring, loc )1378else:1379preloc = loc1380tokensStart = preloc1381try:1382try:1383loc,tokens = self.parseImpl( instring, preloc, doActions )1384except IndexError:1385raise ParseException( instring, len(instring), self.errmsg, self )1386except ParseBaseException as err:1387#~ print ("Exception raised:", err)1388if self.debugActions[2]:1389self.debugActions[2]( instring, tokensStart, self, err )1390if self.failAction:1391self.failAction( instring, tokensStart, self, err )1392raise1393else:1394if callPreParse and self.callPreparse:1395preloc = self.preParse( instring, loc )1396else:1397preloc = loc1398tokensStart = preloc1399if self.mayIndexError or preloc >= len(instring):1400try:1401loc,tokens = self.parseImpl( instring, preloc, doActions )1402except IndexError:1403raise ParseException( instring, len(instring), self.errmsg, self )1404else:1405loc,tokens = self.parseImpl( instring, preloc, doActions )14061407tokens = self.postParse( instring, loc, tokens )14081409retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )1410if self.parseAction and (doActions or self.callDuringTry):1411if debugging:1412try:1413for fn in self.parseAction:1414tokens = fn( instring, tokensStart, retTokens )1415if tokens is not None:1416retTokens = ParseResults( tokens,1417self.resultsName,1418asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),1419modal=self.modalResults )1420except ParseBaseException as err:1421#~ print "Exception raised in user parse action:", err1422if (self.debugActions[2] ):1423self.debugActions[2]( instring, tokensStart, self, err )1424raise1425else:1426for fn in self.parseAction:1427tokens = fn( instring, tokensStart, retTokens )1428if tokens is not None:1429retTokens = ParseResults( tokens,1430self.resultsName,1431asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),1432modal=self.modalResults )1433if debugging:1434#~ print ("Matched",self,"->",retTokens.asList())1435if (self.debugActions[1] ):1436self.debugActions[1]( instring, tokensStart, loc, self, retTokens )14371438return loc, retTokens14391440def tryParse( self, instring, loc ):1441try:1442return self._parse( instring, loc, doActions=False )[0]1443except ParseFatalException:1444raise ParseException( instring, loc, self.errmsg, self)14451446def canParseNext(self, instring, loc):1447try:1448self.tryParse(instring, loc)1449except (ParseException, IndexError):1450return False1451else:1452return True14531454class _UnboundedCache(object):1455def __init__(self):1456cache = {}1457self.not_in_cache = not_in_cache = object()14581459def get(self, key):1460return cache.get(key, not_in_cache)14611462def set(self, key, value):1463cache[key] = value14641465def clear(self):1466cache.clear()14671468def cache_len(self):1469return len(cache)14701471self.get = types.MethodType(get, self)1472self.set = types.MethodType(set, self)1473self.clear = types.MethodType(clear, self)1474self.__len__ = types.MethodType(cache_len, self)14751476if _OrderedDict is not None:1477class _FifoCache(object):1478def __init__(self, size):1479self.not_in_cache = not_in_cache = object()14801481cache = _OrderedDict()14821483def get(self, key):1484return cache.get(key, not_in_cache)14851486def set(self, key, value):1487cache[key] = value1488while len(cache) > size:1489try:1490cache.popitem(False)1491except KeyError:1492pass14931494def clear(self):1495cache.clear()14961497def cache_len(self):1498return len(cache)14991500self.get = types.MethodType(get, self)1501self.set = types.MethodType(set, self)1502self.clear = types.MethodType(clear, self)1503self.__len__ = types.MethodType(cache_len, self)15041505else:1506class _FifoCache(object):1507def __init__(self, size):1508self.not_in_cache = not_in_cache = object()15091510cache = {}1511key_fifo = collections.deque([], size)15121513def get(self, key):1514return cache.get(key, not_in_cache)15151516def set(self, key, value):1517cache[key] = value1518while len(key_fifo) > size:1519cache.pop(key_fifo.popleft(), None)1520key_fifo.append(key)15211522def clear(self):1523cache.clear()1524key_fifo.clear()15251526def cache_len(self):1527return len(cache)15281529self.get = types.MethodType(get, self)1530self.set = types.MethodType(set, self)1531self.clear = types.MethodType(clear, self)1532self.__len__ = types.MethodType(cache_len, self)15331534# argument cache for optimizing repeated calls when backtracking through recursive expressions1535packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail1536packrat_cache_lock = RLock()1537packrat_cache_stats = [0, 0]15381539# this method gets repeatedly called during backtracking with the same arguments -1540# we can cache these arguments and save ourselves the trouble of re-parsing the contained expression1541def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):1542HIT, MISS = 0, 11543lookup = (self, instring, loc, callPreParse, doActions)1544with ParserElement.packrat_cache_lock:1545cache = ParserElement.packrat_cache1546value = cache.get(lookup)1547if value is cache.not_in_cache:1548ParserElement.packrat_cache_stats[MISS] += 11549try:1550value = self._parseNoCache(instring, loc, doActions, callPreParse)1551except ParseBaseException as pe:1552# cache a copy of the exception, without the traceback1553cache.set(lookup, pe.__class__(*pe.args))1554raise1555else:1556cache.set(lookup, (value[0], value[1].copy()))1557return value1558else:1559ParserElement.packrat_cache_stats[HIT] += 11560if isinstance(value, Exception):1561raise value1562return (value[0], value[1].copy())15631564_parse = _parseNoCache15651566@staticmethod1567def resetCache():1568ParserElement.packrat_cache.clear()1569ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)15701571_packratEnabled = False1572@staticmethod1573def enablePackrat(cache_size_limit=128):1574"""Enables "packrat" parsing, which adds memoizing to the parsing logic.1575Repeated parse attempts at the same string location (which happens1576often in many complex grammars) can immediately return a cached value,1577instead of re-executing parsing/validating code. Memoizing is done of1578both valid results and parsing exceptions.15791580Parameters:1581- cache_size_limit - (default=C{128}) - if an integer value is provided1582will limit the size of the packrat cache; if None is passed, then1583the cache size will be unbounded; if 0 is passed, the cache will1584be effectively disabled.15851586This speedup may break existing programs that use parse actions that1587have side-effects. For this reason, packrat parsing is disabled when1588you first import pyparsing. To activate the packrat feature, your1589program must call the class method C{ParserElement.enablePackrat()}. If1590your program uses C{psyco} to "compile as you go", you must call1591C{enablePackrat} before calling C{psyco.full()}. If you do not do this,1592Python will crash. For best results, call C{enablePackrat()} immediately1593after importing pyparsing.15941595Example::1596import pyparsing1597pyparsing.ParserElement.enablePackrat()1598"""1599if not ParserElement._packratEnabled:1600ParserElement._packratEnabled = True1601if cache_size_limit is None:1602ParserElement.packrat_cache = ParserElement._UnboundedCache()1603else:1604ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)1605ParserElement._parse = ParserElement._parseCache16061607def parseString( self, instring, parseAll=False ):1608"""1609Execute the parse expression with the given string.1610This is the main interface to the client code, once the complete1611expression has been built.16121613If you want the grammar to require that the entire input string be1614successfully parsed, then set C{parseAll} to True (equivalent to ending1615the grammar with C{L{StringEnd()}}).16161617Note: C{parseString} implicitly calls C{expandtabs()} on the input string,1618in order to report proper column numbers in parse actions.1619If the input string contains tabs and1620the grammar uses parse actions that use the C{loc} argument to index into the1621string being parsed, you can ensure you have a consistent view of the input1622string by:1623- calling C{parseWithTabs} on your grammar before calling C{parseString}1624(see L{I{parseWithTabs}<parseWithTabs>})1625- define your parse action using the full C{(s,loc,toks)} signature, and1626reference the input string using the parse action's C{s} argument1627- explictly expand the tabs in your input string before calling1628C{parseString}16291630Example::1631Word('a').parseString('aaaaabaaa') # -> ['aaaaa']1632Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text1633"""1634ParserElement.resetCache()1635if not self.streamlined:1636self.streamline()1637#~ self.saveAsList = True1638for e in self.ignoreExprs:1639e.streamline()1640if not self.keepTabs:1641instring = instring.expandtabs()1642try:1643loc, tokens = self._parse( instring, 0 )1644if parseAll:1645loc = self.preParse( instring, loc )1646se = Empty() + StringEnd()1647se._parse( instring, loc )1648except ParseBaseException as exc:1649if ParserElement.verbose_stacktrace:1650raise1651else:1652# catch and re-raise exception from here, clears out pyparsing internal stack trace1653raise exc1654else:1655return tokens16561657def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):1658"""1659Scan the input string for expression matches. Each match will return the1660matching tokens, start location, and end location. May be called with optional1661C{maxMatches} argument, to clip scanning after 'n' matches are found. If1662C{overlap} is specified, then overlapping matches will be reported.16631664Note that the start and end locations are reported relative to the string1665being parsed. See L{I{parseString}<parseString>} for more information on parsing1666strings with embedded tabs.16671668Example::1669source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"1670print(source)1671for tokens,start,end in Word(alphas).scanString(source):1672print(' '*start + '^'*(end-start))1673print(' '*start + tokens[0])16741675prints::16761677sldjf123lsdjjkf345sldkjf879lkjsfd9871678^^^^^1679sldjf1680^^^^^^^1681lsdjjkf1682^^^^^^1683sldkjf1684^^^^^^1685lkjsfd1686"""1687if not self.streamlined:1688self.streamline()1689for e in self.ignoreExprs:1690e.streamline()16911692if not self.keepTabs:1693instring = _ustr(instring).expandtabs()1694instrlen = len(instring)1695loc = 01696preparseFn = self.preParse1697parseFn = self._parse1698ParserElement.resetCache()1699matches = 01700try:1701while loc <= instrlen and matches < maxMatches:1702try:1703preloc = preparseFn( instring, loc )1704nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )1705except ParseException:1706loc = preloc+11707else:1708if nextLoc > loc:1709matches += 11710yield tokens, preloc, nextLoc1711if overlap:1712nextloc = preparseFn( instring, loc )1713if nextloc > loc:1714loc = nextLoc1715else:1716loc += 11717else:1718loc = nextLoc1719else:1720loc = preloc+11721except ParseBaseException as exc:1722if ParserElement.verbose_stacktrace:1723raise1724else:1725# catch and re-raise exception from here, clears out pyparsing internal stack trace1726raise exc17271728def transformString( self, instring ):1729"""1730Extension to C{L{scanString}}, to modify matching text with modified tokens that may1731be returned from a parse action. To use C{transformString}, define a grammar and1732attach a parse action to it that modifies the returned token list.1733Invoking C{transformString()} on a target string will then scan for matches,1734and replace the matched text patterns according to the logic in the parse1735action. C{transformString()} returns the resulting transformed string.17361737Example::1738wd = Word(alphas)1739wd.setParseAction(lambda toks: toks[0].title())17401741print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))1742Prints::1743Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.1744"""1745out = []1746lastE = 01747# force preservation of <TAB>s, to minimize unwanted transformation of string, and to1748# keep string locs straight between transformString and scanString1749self.keepTabs = True1750try:1751for t,s,e in self.scanString( instring ):1752out.append( instring[lastE:s] )1753if t:1754if isinstance(t,ParseResults):1755out += t.asList()1756elif isinstance(t,list):1757out += t1758else:1759out.append(t)1760lastE = e1761out.append(instring[lastE:])1762out = [o for o in out if o]1763return "".join(map(_ustr,_flatten(out)))1764except ParseBaseException as exc:1765if ParserElement.verbose_stacktrace:1766raise1767else:1768# catch and re-raise exception from here, clears out pyparsing internal stack trace1769raise exc17701771def searchString( self, instring, maxMatches=_MAX_INT ):1772"""1773Another extension to C{L{scanString}}, simplifying the access to the tokens found1774to match the given parse expression. May be called with optional1775C{maxMatches} argument, to clip searching after 'n' matches are found.17761777Example::1778# a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters1779cap_word = Word(alphas.upper(), alphas.lower())17801781print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))17821783# the sum() builtin can be used to merge results into a single ParseResults object1784print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))1785prints::1786[['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]1787['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']1788"""1789try:1790return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])1791except ParseBaseException as exc:1792if ParserElement.verbose_stacktrace:1793raise1794else:1795# catch and re-raise exception from here, clears out pyparsing internal stack trace1796raise exc17971798def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):1799"""1800Generator method to split a string using the given expression as a separator.1801May be called with optional C{maxsplit} argument, to limit the number of splits;1802and the optional C{includeSeparators} argument (default=C{False}), if the separating1803matching text should be included in the split results.18041805Example::1806punc = oneOf(list(".,;:/-!?"))1807print(list(punc.split("This, this?, this sentence, is badly punctuated!")))1808prints::1809['This', ' this', '', ' this sentence', ' is badly punctuated', '']1810"""1811splits = 01812last = 01813for t,s,e in self.scanString(instring, maxMatches=maxsplit):1814yield instring[last:s]1815if includeSeparators:1816yield t[0]1817last = e1818yield instring[last:]18191820def __add__(self, other ):1821"""1822Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement1823converts them to L{Literal}s by default.18241825Example::1826greet = Word(alphas) + "," + Word(alphas) + "!"1827hello = "Hello, World!"1828print (hello, "->", greet.parseString(hello))1829Prints::1830Hello, World! -> ['Hello', ',', 'World', '!']1831"""1832if isinstance( other, basestring ):1833other = ParserElement._literalStringClass( other )1834if not isinstance( other, ParserElement ):1835warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),1836SyntaxWarning, stacklevel=2)1837return None1838return And( [ self, other ] )18391840def __radd__(self, other ):1841"""1842Implementation of + operator when left operand is not a C{L{ParserElement}}1843"""1844if isinstance( other, basestring ):1845other = ParserElement._literalStringClass( other )1846if not isinstance( other, ParserElement ):1847warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),1848SyntaxWarning, stacklevel=2)1849return None1850return other + self18511852def __sub__(self, other):1853"""1854Implementation of - operator, returns C{L{And}} with error stop1855"""1856if isinstance( other, basestring ):1857other = ParserElement._literalStringClass( other )1858if not isinstance( other, ParserElement ):1859warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),1860SyntaxWarning, stacklevel=2)1861return None1862return self + And._ErrorStop() + other18631864def __rsub__(self, other ):1865"""1866Implementation of - operator when left operand is not a C{L{ParserElement}}1867"""1868if isinstance( other, basestring ):1869other = ParserElement._literalStringClass( other )1870if not isinstance( other, ParserElement ):1871warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),1872SyntaxWarning, stacklevel=2)1873return None1874return other - self18751876def __mul__(self,other):1877"""1878Implementation of * operator, allows use of C{expr * 3} in place of1879C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer1880tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples1881may also include C{None} as in:1882- C{expr*(n,None)} or C{expr*(n,)} is equivalent1883to C{expr*n + L{ZeroOrMore}(expr)}1884(read as "at least n instances of C{expr}")1885- C{expr*(None,n)} is equivalent to C{expr*(0,n)}1886(read as "0 to n instances of C{expr}")1887- C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}1888- C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}18891890Note that C{expr*(None,n)} does not raise an exception if1891more than n exprs exist in the input stream; that is,1892C{expr*(None,n)} does not enforce a maximum number of expr1893occurrences. If this behavior is desired, then write1894C{expr*(None,n) + ~expr}1895"""1896if isinstance(other,int):1897minElements, optElements = other,01898elif isinstance(other,tuple):1899other = (other + (None, None))[:2]1900if other[0] is None:1901other = (0, other[1])1902if isinstance(other[0],int) and other[1] is None:1903if other[0] == 0:1904return ZeroOrMore(self)1905if other[0] == 1:1906return OneOrMore(self)1907else:1908return self*other[0] + ZeroOrMore(self)1909elif isinstance(other[0],int) and isinstance(other[1],int):1910minElements, optElements = other1911optElements -= minElements1912else:1913raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))1914else:1915raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))19161917if minElements < 0:1918raise ValueError("cannot multiply ParserElement by negative value")1919if optElements < 0:1920raise ValueError("second tuple value must be greater or equal to first tuple value")1921if minElements == optElements == 0:1922raise ValueError("cannot multiply ParserElement by 0 or (0,0)")19231924if (optElements):1925def makeOptionalList(n):1926if n>1:1927return Optional(self + makeOptionalList(n-1))1928else:1929return Optional(self)1930if minElements:1931if minElements == 1:1932ret = self + makeOptionalList(optElements)1933else:1934ret = And([self]*minElements) + makeOptionalList(optElements)1935else:1936ret = makeOptionalList(optElements)1937else:1938if minElements == 1:1939ret = self1940else:1941ret = And([self]*minElements)1942return ret19431944def __rmul__(self, other):1945return self.__mul__(other)19461947def __or__(self, other ):1948"""1949Implementation of | operator - returns C{L{MatchFirst}}1950"""1951if isinstance( other, basestring ):1952other = ParserElement._literalStringClass( other )1953if not isinstance( other, ParserElement ):1954warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),1955SyntaxWarning, stacklevel=2)1956return None1957return MatchFirst( [ self, other ] )19581959def __ror__(self, other ):1960"""1961Implementation of | operator when left operand is not a C{L{ParserElement}}1962"""1963if isinstance( other, basestring ):1964other = ParserElement._literalStringClass( other )1965if not isinstance( other, ParserElement ):1966warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),1967SyntaxWarning, stacklevel=2)1968return None1969return other | self19701971def __xor__(self, other ):1972"""1973Implementation of ^ operator - returns C{L{Or}}1974"""1975if isinstance( other, basestring ):1976other = ParserElement._literalStringClass( other )1977if not isinstance( other, ParserElement ):1978warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),1979SyntaxWarning, stacklevel=2)1980return None1981return Or( [ self, other ] )19821983def __rxor__(self, other ):1984"""1985Implementation of ^ operator when left operand is not a C{L{ParserElement}}1986"""1987if isinstance( other, basestring ):1988other = ParserElement._literalStringClass( other )1989if not isinstance( other, ParserElement ):1990warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),1991SyntaxWarning, stacklevel=2)1992return None1993return other ^ self19941995def __and__(self, other ):1996"""1997Implementation of & operator - returns C{L{Each}}1998"""1999if isinstance( other, basestring ):2000other = ParserElement._literalStringClass( other )2001if not isinstance( other, ParserElement ):2002warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),2003SyntaxWarning, stacklevel=2)2004return None2005return Each( [ self, other ] )20062007def __rand__(self, other ):2008"""2009Implementation of & operator when left operand is not a C{L{ParserElement}}2010"""2011if isinstance( other, basestring ):2012other = ParserElement._literalStringClass( other )2013if not isinstance( other, ParserElement ):2014warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),2015SyntaxWarning, stacklevel=2)2016return None2017return other & self20182019def __invert__( self ):2020"""2021Implementation of ~ operator - returns C{L{NotAny}}2022"""2023return NotAny( self )20242025def __call__(self, name=None):2026"""2027Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.20282029If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be2030passed as C{True}.20312032If C{name} is omitted, same as calling C{L{copy}}.20332034Example::2035# these are equivalent2036userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")2037userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")2038"""2039if name is not None:2040return self.setResultsName(name)2041else:2042return self.copy()20432044def suppress( self ):2045"""2046Suppresses the output of this C{ParserElement}; useful to keep punctuation from2047cluttering up returned output.2048"""2049return Suppress( self )20502051def leaveWhitespace( self ):2052"""2053Disables the skipping of whitespace before matching the characters in the2054C{ParserElement}'s defined pattern. This is normally only used internally by2055the pyparsing module, but may be needed in some whitespace-sensitive grammars.2056"""2057self.skipWhitespace = False2058return self20592060def setWhitespaceChars( self, chars ):2061"""2062Overrides the default whitespace chars2063"""2064self.skipWhitespace = True2065self.whiteChars = chars2066self.copyDefaultWhiteChars = False2067return self20682069def parseWithTabs( self ):2070"""2071Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.2072Must be called before C{parseString} when the input grammar contains elements that2073match C{<TAB>} characters.2074"""2075self.keepTabs = True2076return self20772078def ignore( self, other ):2079"""2080Define expression to be ignored (e.g., comments) while doing pattern2081matching; may be called repeatedly, to define multiple comment or other2082ignorable patterns.20832084Example::2085patt = OneOrMore(Word(alphas))2086patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']20872088patt.ignore(cStyleComment)2089patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']2090"""2091if isinstance(other, basestring):2092other = Suppress(other)20932094if isinstance( other, Suppress ):2095if other not in self.ignoreExprs:2096self.ignoreExprs.append(other)2097else:2098self.ignoreExprs.append( Suppress( other.copy() ) )2099return self21002101def setDebugActions( self, startAction, successAction, exceptionAction ):2102"""2103Enable display of debugging messages while doing pattern matching.2104"""2105self.debugActions = (startAction or _defaultStartDebugAction,2106successAction or _defaultSuccessDebugAction,2107exceptionAction or _defaultExceptionDebugAction)2108self.debug = True2109return self21102111def setDebug( self, flag=True ):2112"""2113Enable display of debugging messages while doing pattern matching.2114Set C{flag} to True to enable, False to disable.21152116Example::2117wd = Word(alphas).setName("alphaword")2118integer = Word(nums).setName("numword")2119term = wd | integer21202121# turn on debugging for wd2122wd.setDebug()21232124OneOrMore(term).parseString("abc 123 xyz 890")21252126prints::2127Match alphaword at loc 0(1,1)2128Matched alphaword -> ['abc']2129Match alphaword at loc 3(1,4)2130Exception raised:Expected alphaword (at char 4), (line:1, col:5)2131Match alphaword at loc 7(1,8)2132Matched alphaword -> ['xyz']2133Match alphaword at loc 11(1,12)2134Exception raised:Expected alphaword (at char 12), (line:1, col:13)2135Match alphaword at loc 15(1,16)2136Exception raised:Expected alphaword (at char 15), (line:1, col:16)21372138The output shown is that produced by the default debug actions - custom debug actions can be2139specified using L{setDebugActions}. Prior to attempting2140to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}2141is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}2142message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,2143which makes debugging and exception messages easier to understand - for instance, the default2144name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.2145"""2146if flag:2147self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )2148else:2149self.debug = False2150return self21512152def __str__( self ):2153return self.name21542155def __repr__( self ):2156return _ustr(self)21572158def streamline( self ):2159self.streamlined = True2160self.strRepr = None2161return self21622163def checkRecursion( self, parseElementList ):2164pass21652166def validate( self, validateTrace=[] ):2167"""2168Check defined expressions for valid structure, check for infinite recursive definitions.2169"""2170self.checkRecursion( [] )21712172def parseFile( self, file_or_filename, parseAll=False ):2173"""2174Execute the parse expression on the given file or filename.2175If a filename is specified (instead of a file object),2176the entire file is opened, read, and closed before parsing.2177"""2178try:2179file_contents = file_or_filename.read()2180except AttributeError:2181with open(file_or_filename, "r") as f:2182file_contents = f.read()2183try:2184return self.parseString(file_contents, parseAll)2185except ParseBaseException as exc:2186if ParserElement.verbose_stacktrace:2187raise2188else:2189# catch and re-raise exception from here, clears out pyparsing internal stack trace2190raise exc21912192def __eq__(self,other):2193if isinstance(other, ParserElement):2194return self is other or vars(self) == vars(other)2195elif isinstance(other, basestring):2196return self.matches(other)2197else:2198return super(ParserElement,self)==other21992200def __ne__(self,other):2201return not (self == other)22022203def __hash__(self):2204return hash(id(self))22052206def __req__(self,other):2207return self == other22082209def __rne__(self,other):2210return not (self == other)22112212def matches(self, testString, parseAll=True):2213"""2214Method for quick testing of a parser against a test string. Good for simple2215inline microtests of sub expressions while building up larger parser.22162217Parameters:2218- testString - to test against this expression for a match2219- parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests22202221Example::2222expr = Word(nums)2223assert expr.matches("100")2224"""2225try:2226self.parseString(_ustr(testString), parseAll=parseAll)2227return True2228except ParseBaseException:2229return False22302231def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):2232"""2233Execute the parse expression on a series of test strings, showing each2234test, the parsed results or where the parse failed. Quick and easy way to2235run a parse expression against a list of sample strings.22362237Parameters:2238- tests - a list of separate test strings, or a multiline string of test strings2239- parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests2240- comment - (default=C{'#'}) - expression for indicating embedded comments in the test2241string; pass None to disable comment filtering2242- fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;2243if False, only dump nested list2244- printResults - (default=C{True}) prints test output to stdout2245- failureTests - (default=C{False}) indicates if these tests are expected to fail parsing22462247Returns: a (success, results) tuple, where success indicates that all tests succeeded2248(or failed if C{failureTests} is True), and the results contain a list of lines of each2249test's output22502251Example::2252number_expr = pyparsing_common.number.copy()22532254result = number_expr.runTests('''2255# unsigned integer22561002257# negative integer2258-1002259# float with scientific notation22606.02e232261# integer with scientific notation22621e-122263''')2264print("Success" if result[0] else "Failed!")22652266result = number_expr.runTests('''2267# stray character2268100Z2269# missing leading digit before '.'2270-.1002271# too many '.'22723.14.1592273''', failureTests=True)2274print("Success" if result[0] else "Failed!")2275prints::2276# unsigned integer22771002278[100]22792280# negative integer2281-1002282[-100]22832284# float with scientific notation22856.02e232286[6.02e+23]22872288# integer with scientific notation22891e-122290[1e-12]22912292Success22932294# stray character2295100Z2296^2297FAIL: Expected end of text (at char 3), (line:1, col:4)22982299# missing leading digit before '.'2300-.1002301^2302FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)23032304# too many '.'23053.14.1592306^2307FAIL: Expected end of text (at char 4), (line:1, col:5)23082309Success23102311Each test string must be on a single line. If you want to test a string that spans multiple2312lines, create a test like this::23132314expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")23152316(Note that this is a raw string literal, you must include the leading 'r'.)2317"""2318if isinstance(tests, basestring):2319tests = list(map(str.strip, tests.rstrip().splitlines()))2320if isinstance(comment, basestring):2321comment = Literal(comment)2322allResults = []2323comments = []2324success = True2325for t in tests:2326if comment is not None and comment.matches(t, False) or comments and not t:2327comments.append(t)2328continue2329if not t:2330continue2331out = ['\n'.join(comments), t]2332comments = []2333try:2334t = t.replace(r'\n','\n')2335result = self.parseString(t, parseAll=parseAll)2336out.append(result.dump(full=fullDump))2337success = success and not failureTests2338except ParseBaseException as pe:2339fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""2340if '\n' in t:2341out.append(line(pe.loc, t))2342out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)2343else:2344out.append(' '*pe.loc + '^' + fatal)2345out.append("FAIL: " + str(pe))2346success = success and failureTests2347result = pe2348except Exception as exc:2349out.append("FAIL-EXCEPTION: " + str(exc))2350success = success and failureTests2351result = exc23522353if printResults:2354if fullDump:2355out.append('')2356print('\n'.join(out))23572358allResults.append((t, result))23592360return success, allResults236123622363class Token(ParserElement):2364"""2365Abstract C{ParserElement} subclass, for defining atomic matching patterns.2366"""2367def __init__( self ):2368super(Token,self).__init__( savelist=False )236923702371class Empty(Token):2372"""2373An empty token, will always match.2374"""2375def __init__( self ):2376super(Empty,self).__init__()2377self.name = "Empty"2378self.mayReturnEmpty = True2379self.mayIndexError = False238023812382class NoMatch(Token):2383"""2384A token that will never match.2385"""2386def __init__( self ):2387super(NoMatch,self).__init__()2388self.name = "NoMatch"2389self.mayReturnEmpty = True2390self.mayIndexError = False2391self.errmsg = "Unmatchable token"23922393def parseImpl( self, instring, loc, doActions=True ):2394raise ParseException(instring, loc, self.errmsg, self)239523962397class Literal(Token):2398"""2399Token to exactly match a specified string.24002401Example::2402Literal('blah').parseString('blah') # -> ['blah']2403Literal('blah').parseString('blahfooblah') # -> ['blah']2404Literal('blah').parseString('bla') # -> Exception: Expected "blah"24052406For case-insensitive matching, use L{CaselessLiteral}.24072408For keyword matching (force word break before and after the matched string),2409use L{Keyword} or L{CaselessKeyword}.2410"""2411def __init__( self, matchString ):2412super(Literal,self).__init__()2413self.match = matchString2414self.matchLen = len(matchString)2415try:2416self.firstMatchChar = matchString[0]2417except IndexError:2418warnings.warn("null string passed to Literal; use Empty() instead",2419SyntaxWarning, stacklevel=2)2420self.__class__ = Empty2421self.name = '"%s"' % _ustr(self.match)2422self.errmsg = "Expected " + self.name2423self.mayReturnEmpty = False2424self.mayIndexError = False24252426# Performance tuning: this routine gets called a *lot*2427# if this is a single character match string and the first character matches,2428# short-circuit as quickly as possible, and avoid calling startswith2429#~ @profile2430def parseImpl( self, instring, loc, doActions=True ):2431if (instring[loc] == self.firstMatchChar and2432(self.matchLen==1 or instring.startswith(self.match,loc)) ):2433return loc+self.matchLen, self.match2434raise ParseException(instring, loc, self.errmsg, self)2435_L = Literal2436ParserElement._literalStringClass = Literal24372438class Keyword(Token):2439"""2440Token to exactly match a specified string as a keyword, that is, it must be2441immediately followed by a non-keyword character. Compare with C{L{Literal}}:2442- C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.2443- C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}2444Accepts two optional constructor arguments in addition to the keyword string:2445- C{identChars} is a string of characters that would be valid identifier characters,2446defaulting to all alphanumerics + "_" and "$"2447- C{caseless} allows case-insensitive matching, default is C{False}.24482449Example::2450Keyword("start").parseString("start") # -> ['start']2451Keyword("start").parseString("starting") # -> Exception24522453For case-insensitive matching, use L{CaselessKeyword}.2454"""2455DEFAULT_KEYWORD_CHARS = alphanums+"_$"24562457def __init__( self, matchString, identChars=None, caseless=False ):2458super(Keyword,self).__init__()2459if identChars is None:2460identChars = Keyword.DEFAULT_KEYWORD_CHARS2461self.match = matchString2462self.matchLen = len(matchString)2463try:2464self.firstMatchChar = matchString[0]2465except IndexError:2466warnings.warn("null string passed to Keyword; use Empty() instead",2467SyntaxWarning, stacklevel=2)2468self.name = '"%s"' % self.match2469self.errmsg = "Expected " + self.name2470self.mayReturnEmpty = False2471self.mayIndexError = False2472self.caseless = caseless2473if caseless:2474self.caselessmatch = matchString.upper()2475identChars = identChars.upper()2476self.identChars = set(identChars)24772478def parseImpl( self, instring, loc, doActions=True ):2479if self.caseless:2480if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and2481(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and2482(loc == 0 or instring[loc-1].upper() not in self.identChars) ):2483return loc+self.matchLen, self.match2484else:2485if (instring[loc] == self.firstMatchChar and2486(self.matchLen==1 or instring.startswith(self.match,loc)) and2487(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and2488(loc == 0 or instring[loc-1] not in self.identChars) ):2489return loc+self.matchLen, self.match2490raise ParseException(instring, loc, self.errmsg, self)24912492def copy(self):2493c = super(Keyword,self).copy()2494c.identChars = Keyword.DEFAULT_KEYWORD_CHARS2495return c24962497@staticmethod2498def setDefaultKeywordChars( chars ):2499"""Overrides the default Keyword chars2500"""2501Keyword.DEFAULT_KEYWORD_CHARS = chars25022503class CaselessLiteral(Literal):2504"""2505Token to match a specified string, ignoring case of letters.2506Note: the matched results will always be in the case of the given2507match string, NOT the case of the input text.25082509Example::2510OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']25112512(Contrast with example for L{CaselessKeyword}.)2513"""2514def __init__( self, matchString ):2515super(CaselessLiteral,self).__init__( matchString.upper() )2516# Preserve the defining literal.2517self.returnString = matchString2518self.name = "'%s'" % self.returnString2519self.errmsg = "Expected " + self.name25202521def parseImpl( self, instring, loc, doActions=True ):2522if instring[ loc:loc+self.matchLen ].upper() == self.match:2523return loc+self.matchLen, self.returnString2524raise ParseException(instring, loc, self.errmsg, self)25252526class CaselessKeyword(Keyword):2527"""2528Caseless version of L{Keyword}.25292530Example::2531OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']25322533(Contrast with example for L{CaselessLiteral}.)2534"""2535def __init__( self, matchString, identChars=None ):2536super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )25372538def parseImpl( self, instring, loc, doActions=True ):2539if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and2540(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):2541return loc+self.matchLen, self.match2542raise ParseException(instring, loc, self.errmsg, self)25432544class CloseMatch(Token):2545"""2546A variation on L{Literal} which matches "close" matches, that is,2547strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:2548- C{match_string} - string to be matched2549- C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match25502551The results from a successful parse will contain the matched text from the input string and the following named results:2552- C{mismatches} - a list of the positions within the match_string where mismatches were found2553- C{original} - the original match_string used to compare against the input string25542555If C{mismatches} is an empty list, then the match was an exact match.25562557Example::2558patt = CloseMatch("ATCATCGAATGGA")2559patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})2560patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)25612562# exact match2563patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})25642565# close match allowing up to 2 mismatches2566patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)2567patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})2568"""2569def __init__(self, match_string, maxMismatches=1):2570super(CloseMatch,self).__init__()2571self.name = match_string2572self.match_string = match_string2573self.maxMismatches = maxMismatches2574self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)2575self.mayIndexError = False2576self.mayReturnEmpty = False25772578def parseImpl( self, instring, loc, doActions=True ):2579start = loc2580instrlen = len(instring)2581maxloc = start + len(self.match_string)25822583if maxloc <= instrlen:2584match_string = self.match_string2585match_stringloc = 02586mismatches = []2587maxMismatches = self.maxMismatches25882589for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):2590src,mat = s_m2591if src != mat:2592mismatches.append(match_stringloc)2593if len(mismatches) > maxMismatches:2594break2595else:2596loc = match_stringloc + 12597results = ParseResults([instring[start:loc]])2598results['original'] = self.match_string2599results['mismatches'] = mismatches2600return loc, results26012602raise ParseException(instring, loc, self.errmsg, self)260326042605class Word(Token):2606"""2607Token for matching words composed of allowed character sets.2608Defined with string containing all allowed initial characters,2609an optional string containing allowed body characters (if omitted,2610defaults to the initial character set), and an optional minimum,2611maximum, and/or exact length. The default value for C{min} is 1 (a2612minimum value < 1 is not valid); the default values for C{max} and C{exact}2613are 0, meaning no maximum or exact length restriction. An optional2614C{excludeChars} parameter can list characters that might be found in2615the input C{bodyChars} string; useful to define a word of all printables2616except for one or two characters, for instance.26172618L{srange} is useful for defining custom character set strings for defining2619C{Word} expressions, using range notation from regular expression character sets.26202621A common mistake is to use C{Word} to match a specific literal string, as in2622C{Word("Address")}. Remember that C{Word} uses the string argument to define2623I{sets} of matchable characters. This expression would match "Add", "AAA",2624"dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.2625To match an exact literal string, use L{Literal} or L{Keyword}.26262627pyparsing includes helper strings for building Words:2628- L{alphas}2629- L{nums}2630- L{alphanums}2631- L{hexnums}2632- L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)2633- L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)2634- L{printables} (any non-whitespace character)26352636Example::2637# a word composed of digits2638integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))26392640# a word with a leading capital, and zero or more lowercase2641capital_word = Word(alphas.upper(), alphas.lower())26422643# hostnames are alphanumeric, with leading alpha, and '-'2644hostname = Word(alphas, alphanums+'-')26452646# roman numeral (not a strict parser, accepts invalid mix of characters)2647roman = Word("IVXLCDM")26482649# any string of non-whitespace characters, except for ','2650csv_value = Word(printables, excludeChars=",")2651"""2652def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):2653super(Word,self).__init__()2654if excludeChars:2655initChars = ''.join(c for c in initChars if c not in excludeChars)2656if bodyChars:2657bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)2658self.initCharsOrig = initChars2659self.initChars = set(initChars)2660if bodyChars :2661self.bodyCharsOrig = bodyChars2662self.bodyChars = set(bodyChars)2663else:2664self.bodyCharsOrig = initChars2665self.bodyChars = set(initChars)26662667self.maxSpecified = max > 026682669if min < 1:2670raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")26712672self.minLen = min26732674if max > 0:2675self.maxLen = max2676else:2677self.maxLen = _MAX_INT26782679if exact > 0:2680self.maxLen = exact2681self.minLen = exact26822683self.name = _ustr(self)2684self.errmsg = "Expected " + self.name2685self.mayIndexError = False2686self.asKeyword = asKeyword26872688if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):2689if self.bodyCharsOrig == self.initCharsOrig:2690self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)2691elif len(self.initCharsOrig) == 1:2692self.reString = "%s[%s]*" % \2693(re.escape(self.initCharsOrig),2694_escapeRegexRangeChars(self.bodyCharsOrig),)2695else:2696self.reString = "[%s][%s]*" % \2697(_escapeRegexRangeChars(self.initCharsOrig),2698_escapeRegexRangeChars(self.bodyCharsOrig),)2699if self.asKeyword:2700self.reString = r"\b"+self.reString+r"\b"2701try:2702self.re = re.compile( self.reString )2703except Exception:2704self.re = None27052706def parseImpl( self, instring, loc, doActions=True ):2707if self.re:2708result = self.re.match(instring,loc)2709if not result:2710raise ParseException(instring, loc, self.errmsg, self)27112712loc = result.end()2713return loc, result.group()27142715if not(instring[ loc ] in self.initChars):2716raise ParseException(instring, loc, self.errmsg, self)27172718start = loc2719loc += 12720instrlen = len(instring)2721bodychars = self.bodyChars2722maxloc = start + self.maxLen2723maxloc = min( maxloc, instrlen )2724while loc < maxloc and instring[loc] in bodychars:2725loc += 127262727throwException = False2728if loc - start < self.minLen:2729throwException = True2730if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:2731throwException = True2732if self.asKeyword:2733if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):2734throwException = True27352736if throwException:2737raise ParseException(instring, loc, self.errmsg, self)27382739return loc, instring[start:loc]27402741def __str__( self ):2742try:2743return super(Word,self).__str__()2744except Exception:2745pass274627472748if self.strRepr is None:27492750def charsAsStr(s):2751if len(s)>4:2752return s[:4]+"..."2753else:2754return s27552756if ( self.initCharsOrig != self.bodyCharsOrig ):2757self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )2758else:2759self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)27602761return self.strRepr276227632764class Regex(Token):2765r"""2766Token for matching strings that match a given regular expression.2767Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.2768If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as2769named parse results.27702771Example::2772realnum = Regex(r"[+-]?\d+\.\d*")2773date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')2774# ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression2775roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")2776"""2777compiledREtype = type(re.compile("[A-Z]"))2778def __init__( self, pattern, flags=0):2779"""The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""2780super(Regex,self).__init__()27812782if isinstance(pattern, basestring):2783if not pattern:2784warnings.warn("null string passed to Regex; use Empty() instead",2785SyntaxWarning, stacklevel=2)27862787self.pattern = pattern2788self.flags = flags27892790try:2791self.re = re.compile(self.pattern, self.flags)2792self.reString = self.pattern2793except sre_constants.error:2794warnings.warn("invalid pattern (%s) passed to Regex" % pattern,2795SyntaxWarning, stacklevel=2)2796raise27972798elif isinstance(pattern, Regex.compiledREtype):2799self.re = pattern2800self.pattern = \2801self.reString = str(pattern)2802self.flags = flags28032804else:2805raise ValueError("Regex may only be constructed with a string or a compiled RE object")28062807self.name = _ustr(self)2808self.errmsg = "Expected " + self.name2809self.mayIndexError = False2810self.mayReturnEmpty = True28112812def parseImpl( self, instring, loc, doActions=True ):2813result = self.re.match(instring,loc)2814if not result:2815raise ParseException(instring, loc, self.errmsg, self)28162817loc = result.end()2818d = result.groupdict()2819ret = ParseResults(result.group())2820if d:2821for k in d:2822ret[k] = d[k]2823return loc,ret28242825def __str__( self ):2826try:2827return super(Regex,self).__str__()2828except Exception:2829pass28302831if self.strRepr is None:2832self.strRepr = "Re:(%s)" % repr(self.pattern)28332834return self.strRepr283528362837class QuotedString(Token):2838r"""2839Token for matching strings that are delimited by quoting characters.28402841Defined with the following parameters:2842- quoteChar - string of one or more characters defining the quote delimiting string2843- escChar - character to escape quotes, typically backslash (default=C{None})2844- escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})2845- multiline - boolean indicating whether quotes can span multiple lines (default=C{False})2846- unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})2847- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)2848- convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})28492850Example::2851qs = QuotedString('"')2852print(qs.searchString('lsjdf "This is the quote" sldjf'))2853complex_qs = QuotedString('{{', endQuoteChar='}}')2854print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))2855sql_qs = QuotedString('"', escQuote='""')2856print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))2857prints::2858[['This is the quote']]2859[['This is the "quote"']]2860[['This is the quote with "embedded" quotes']]2861"""2862def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):2863super(QuotedString,self).__init__()28642865# remove white space from quote chars - wont work anyway2866quoteChar = quoteChar.strip()2867if not quoteChar:2868warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)2869raise SyntaxError()28702871if endQuoteChar is None:2872endQuoteChar = quoteChar2873else:2874endQuoteChar = endQuoteChar.strip()2875if not endQuoteChar:2876warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)2877raise SyntaxError()28782879self.quoteChar = quoteChar2880self.quoteCharLen = len(quoteChar)2881self.firstQuoteChar = quoteChar[0]2882self.endQuoteChar = endQuoteChar2883self.endQuoteCharLen = len(endQuoteChar)2884self.escChar = escChar2885self.escQuote = escQuote2886self.unquoteResults = unquoteResults2887self.convertWhitespaceEscapes = convertWhitespaceEscapes28882889if multiline:2890self.flags = re.MULTILINE | re.DOTALL2891self.pattern = r'%s(?:[^%s%s]' % \2892( re.escape(self.quoteChar),2893_escapeRegexRangeChars(self.endQuoteChar[0]),2894(escChar is not None and _escapeRegexRangeChars(escChar) or '') )2895else:2896self.flags = 02897self.pattern = r'%s(?:[^%s\n\r%s]' % \2898( re.escape(self.quoteChar),2899_escapeRegexRangeChars(self.endQuoteChar[0]),2900(escChar is not None and _escapeRegexRangeChars(escChar) or '') )2901if len(self.endQuoteChar) > 1:2902self.pattern += (2903'|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),2904_escapeRegexRangeChars(self.endQuoteChar[i]))2905for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'2906)2907if escQuote:2908self.pattern += (r'|(?:%s)' % re.escape(escQuote))2909if escChar:2910self.pattern += (r'|(?:%s.)' % re.escape(escChar))2911self.escCharReplacePattern = re.escape(self.escChar)+"(.)"2912self.pattern += (r')*%s' % re.escape(self.endQuoteChar))29132914try:2915self.re = re.compile(self.pattern, self.flags)2916self.reString = self.pattern2917except sre_constants.error:2918warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,2919SyntaxWarning, stacklevel=2)2920raise29212922self.name = _ustr(self)2923self.errmsg = "Expected " + self.name2924self.mayIndexError = False2925self.mayReturnEmpty = True29262927def parseImpl( self, instring, loc, doActions=True ):2928result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None2929if not result:2930raise ParseException(instring, loc, self.errmsg, self)29312932loc = result.end()2933ret = result.group()29342935if self.unquoteResults:29362937# strip off quotes2938ret = ret[self.quoteCharLen:-self.endQuoteCharLen]29392940if isinstance(ret,basestring):2941# replace escaped whitespace2942if '\\' in ret and self.convertWhitespaceEscapes:2943ws_map = {2944r'\t' : '\t',2945r'\n' : '\n',2946r'\f' : '\f',2947r'\r' : '\r',2948}2949for wslit,wschar in ws_map.items():2950ret = ret.replace(wslit, wschar)29512952# replace escaped characters2953if self.escChar:2954ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)29552956# replace escaped quotes2957if self.escQuote:2958ret = ret.replace(self.escQuote, self.endQuoteChar)29592960return loc, ret29612962def __str__( self ):2963try:2964return super(QuotedString,self).__str__()2965except Exception:2966pass29672968if self.strRepr is None:2969self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)29702971return self.strRepr297229732974class CharsNotIn(Token):2975"""2976Token for matching words composed of characters I{not} in a given set (will2977include whitespace in matched characters if not listed in the provided exclusion set - see example).2978Defined with string containing all disallowed characters, and an optional2979minimum, maximum, and/or exact length. The default value for C{min} is 1 (a2980minimum value < 1 is not valid); the default values for C{max} and C{exact}2981are 0, meaning no maximum or exact length restriction.29822983Example::2984# define a comma-separated-value as anything that is not a ','2985csv_value = CharsNotIn(',')2986print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))2987prints::2988['dkls', 'lsdkjf', 's12 34', '@!#', '213']2989"""2990def __init__( self, notChars, min=1, max=0, exact=0 ):2991super(CharsNotIn,self).__init__()2992self.skipWhitespace = False2993self.notChars = notChars29942995if min < 1:2996raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")29972998self.minLen = min29993000if max > 0:3001self.maxLen = max3002else:3003self.maxLen = _MAX_INT30043005if exact > 0:3006self.maxLen = exact3007self.minLen = exact30083009self.name = _ustr(self)3010self.errmsg = "Expected " + self.name3011self.mayReturnEmpty = ( self.minLen == 0 )3012self.mayIndexError = False30133014def parseImpl( self, instring, loc, doActions=True ):3015if instring[loc] in self.notChars:3016raise ParseException(instring, loc, self.errmsg, self)30173018start = loc3019loc += 13020notchars = self.notChars3021maxlen = min( start+self.maxLen, len(instring) )3022while loc < maxlen and \3023(instring[loc] not in notchars):3024loc += 130253026if loc - start < self.minLen:3027raise ParseException(instring, loc, self.errmsg, self)30283029return loc, instring[start:loc]30303031def __str__( self ):3032try:3033return super(CharsNotIn, self).__str__()3034except Exception:3035pass30363037if self.strRepr is None:3038if len(self.notChars) > 4:3039self.strRepr = "!W:(%s...)" % self.notChars[:4]3040else:3041self.strRepr = "!W:(%s)" % self.notChars30423043return self.strRepr30443045class White(Token):3046"""3047Special matching class for matching whitespace. Normally, whitespace is ignored3048by pyparsing grammars. This class is included when some whitespace structures3049are significant. Define with a string containing the whitespace characters to be3050matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,3051as defined for the C{L{Word}} class.3052"""3053whiteStrs = {3054" " : "<SPC>",3055"\t": "<TAB>",3056"\n": "<LF>",3057"\r": "<CR>",3058"\f": "<FF>",3059}3060def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):3061super(White,self).__init__()3062self.matchWhite = ws3063self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )3064#~ self.leaveWhitespace()3065self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))3066self.mayReturnEmpty = True3067self.errmsg = "Expected " + self.name30683069self.minLen = min30703071if max > 0:3072self.maxLen = max3073else:3074self.maxLen = _MAX_INT30753076if exact > 0:3077self.maxLen = exact3078self.minLen = exact30793080def parseImpl( self, instring, loc, doActions=True ):3081if not(instring[ loc ] in self.matchWhite):3082raise ParseException(instring, loc, self.errmsg, self)3083start = loc3084loc += 13085maxloc = start + self.maxLen3086maxloc = min( maxloc, len(instring) )3087while loc < maxloc and instring[loc] in self.matchWhite:3088loc += 130893090if loc - start < self.minLen:3091raise ParseException(instring, loc, self.errmsg, self)30923093return loc, instring[start:loc]309430953096class _PositionToken(Token):3097def __init__( self ):3098super(_PositionToken,self).__init__()3099self.name=self.__class__.__name__3100self.mayReturnEmpty = True3101self.mayIndexError = False31023103class GoToColumn(_PositionToken):3104"""3105Token to advance to a specific column of input text; useful for tabular report scraping.3106"""3107def __init__( self, colno ):3108super(GoToColumn,self).__init__()3109self.col = colno31103111def preParse( self, instring, loc ):3112if col(loc,instring) != self.col:3113instrlen = len(instring)3114if self.ignoreExprs:3115loc = self._skipIgnorables( instring, loc )3116while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :3117loc += 13118return loc31193120def parseImpl( self, instring, loc, doActions=True ):3121thiscol = col( loc, instring )3122if thiscol > self.col:3123raise ParseException( instring, loc, "Text not in expected column", self )3124newloc = loc + self.col - thiscol3125ret = instring[ loc: newloc ]3126return newloc, ret312731283129class LineStart(_PositionToken):3130"""3131Matches if current position is at the beginning of a line within the parse string31323133Example::31343135test = '''\3136AAA this line3137AAA and this line3138AAA but not this one3139B AAA and definitely not this one3140'''31413142for t in (LineStart() + 'AAA' + restOfLine).searchString(test):3143print(t)31443145Prints::3146['AAA', ' this line']3147['AAA', ' and this line']31483149"""3150def __init__( self ):3151super(LineStart,self).__init__()3152self.errmsg = "Expected start of line"31533154def parseImpl( self, instring, loc, doActions=True ):3155if col(loc, instring) == 1:3156return loc, []3157raise ParseException(instring, loc, self.errmsg, self)31583159class LineEnd(_PositionToken):3160"""3161Matches if current position is at the end of a line within the parse string3162"""3163def __init__( self ):3164super(LineEnd,self).__init__()3165self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )3166self.errmsg = "Expected end of line"31673168def parseImpl( self, instring, loc, doActions=True ):3169if loc<len(instring):3170if instring[loc] == "\n":3171return loc+1, "\n"3172else:3173raise ParseException(instring, loc, self.errmsg, self)3174elif loc == len(instring):3175return loc+1, []3176else:3177raise ParseException(instring, loc, self.errmsg, self)31783179class StringStart(_PositionToken):3180"""3181Matches if current position is at the beginning of the parse string3182"""3183def __init__( self ):3184super(StringStart,self).__init__()3185self.errmsg = "Expected start of text"31863187def parseImpl( self, instring, loc, doActions=True ):3188if loc != 0:3189# see if entire string up to here is just whitespace and ignoreables3190if loc != self.preParse( instring, 0 ):3191raise ParseException(instring, loc, self.errmsg, self)3192return loc, []31933194class StringEnd(_PositionToken):3195"""3196Matches if current position is at the end of the parse string3197"""3198def __init__( self ):3199super(StringEnd,self).__init__()3200self.errmsg = "Expected end of text"32013202def parseImpl( self, instring, loc, doActions=True ):3203if loc < len(instring):3204raise ParseException(instring, loc, self.errmsg, self)3205elif loc == len(instring):3206return loc+1, []3207elif loc > len(instring):3208return loc, []3209else:3210raise ParseException(instring, loc, self.errmsg, self)32113212class WordStart(_PositionToken):3213"""3214Matches if the current position is at the beginning of a Word, and3215is not preceded by any character in a given set of C{wordChars}3216(default=C{printables}). To emulate the C{\b} behavior of regular expressions,3217use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of3218the string being parsed, or at the beginning of a line.3219"""3220def __init__(self, wordChars = printables):3221super(WordStart,self).__init__()3222self.wordChars = set(wordChars)3223self.errmsg = "Not at the start of a word"32243225def parseImpl(self, instring, loc, doActions=True ):3226if loc != 0:3227if (instring[loc-1] in self.wordChars or3228instring[loc] not in self.wordChars):3229raise ParseException(instring, loc, self.errmsg, self)3230return loc, []32313232class WordEnd(_PositionToken):3233"""3234Matches if the current position is at the end of a Word, and3235is not followed by any character in a given set of C{wordChars}3236(default=C{printables}). To emulate the C{\b} behavior of regular expressions,3237use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of3238the string being parsed, or at the end of a line.3239"""3240def __init__(self, wordChars = printables):3241super(WordEnd,self).__init__()3242self.wordChars = set(wordChars)3243self.skipWhitespace = False3244self.errmsg = "Not at the end of a word"32453246def parseImpl(self, instring, loc, doActions=True ):3247instrlen = len(instring)3248if instrlen>0 and loc<instrlen:3249if (instring[loc] in self.wordChars or3250instring[loc-1] not in self.wordChars):3251raise ParseException(instring, loc, self.errmsg, self)3252return loc, []325332543255class ParseExpression(ParserElement):3256"""3257Abstract subclass of ParserElement, for combining and post-processing parsed tokens.3258"""3259def __init__( self, exprs, savelist = False ):3260super(ParseExpression,self).__init__(savelist)3261if isinstance( exprs, _generatorType ):3262exprs = list(exprs)32633264if isinstance( exprs, basestring ):3265self.exprs = [ ParserElement._literalStringClass( exprs ) ]3266elif isinstance( exprs, Iterable ):3267exprs = list(exprs)3268# if sequence of strings provided, wrap with Literal3269if all(isinstance(expr, basestring) for expr in exprs):3270exprs = map(ParserElement._literalStringClass, exprs)3271self.exprs = list(exprs)3272else:3273try:3274self.exprs = list( exprs )3275except TypeError:3276self.exprs = [ exprs ]3277self.callPreparse = False32783279def __getitem__( self, i ):3280return self.exprs[i]32813282def append( self, other ):3283self.exprs.append( other )3284self.strRepr = None3285return self32863287def leaveWhitespace( self ):3288"""Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on3289all contained expressions."""3290self.skipWhitespace = False3291self.exprs = [ e.copy() for e in self.exprs ]3292for e in self.exprs:3293e.leaveWhitespace()3294return self32953296def ignore( self, other ):3297if isinstance( other, Suppress ):3298if other not in self.ignoreExprs:3299super( ParseExpression, self).ignore( other )3300for e in self.exprs:3301e.ignore( self.ignoreExprs[-1] )3302else:3303super( ParseExpression, self).ignore( other )3304for e in self.exprs:3305e.ignore( self.ignoreExprs[-1] )3306return self33073308def __str__( self ):3309try:3310return super(ParseExpression,self).__str__()3311except Exception:3312pass33133314if self.strRepr is None:3315self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )3316return self.strRepr33173318def streamline( self ):3319super(ParseExpression,self).streamline()33203321for e in self.exprs:3322e.streamline()33233324# collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )3325# but only if there are no parse actions or resultsNames on the nested And's3326# (likewise for Or's and MatchFirst's)3327if ( len(self.exprs) == 2 ):3328other = self.exprs[0]3329if ( isinstance( other, self.__class__ ) and3330not(other.parseAction) and3331other.resultsName is None and3332not other.debug ):3333self.exprs = other.exprs[:] + [ self.exprs[1] ]3334self.strRepr = None3335self.mayReturnEmpty |= other.mayReturnEmpty3336self.mayIndexError |= other.mayIndexError33373338other = self.exprs[-1]3339if ( isinstance( other, self.__class__ ) and3340not(other.parseAction) and3341other.resultsName is None and3342not other.debug ):3343self.exprs = self.exprs[:-1] + other.exprs[:]3344self.strRepr = None3345self.mayReturnEmpty |= other.mayReturnEmpty3346self.mayIndexError |= other.mayIndexError33473348self.errmsg = "Expected " + _ustr(self)33493350return self33513352def setResultsName( self, name, listAllMatches=False ):3353ret = super(ParseExpression,self).setResultsName(name,listAllMatches)3354return ret33553356def validate( self, validateTrace=[] ):3357tmp = validateTrace[:]+[self]3358for e in self.exprs:3359e.validate(tmp)3360self.checkRecursion( [] )33613362def copy(self):3363ret = super(ParseExpression,self).copy()3364ret.exprs = [e.copy() for e in self.exprs]3365return ret33663367class And(ParseExpression):3368"""3369Requires all given C{ParseExpression}s to be found in the given order.3370Expressions may be separated by whitespace.3371May be constructed using the C{'+'} operator.3372May also be constructed using the C{'-'} operator, which will suppress backtracking.33733374Example::3375integer = Word(nums)3376name_expr = OneOrMore(Word(alphas))33773378expr = And([integer("id"),name_expr("name"),integer("age")])3379# more easily written as:3380expr = integer("id") + name_expr("name") + integer("age")3381"""33823383class _ErrorStop(Empty):3384def __init__(self, *args, **kwargs):3385super(And._ErrorStop,self).__init__(*args, **kwargs)3386self.name = '-'3387self.leaveWhitespace()33883389def __init__( self, exprs, savelist = True ):3390super(And,self).__init__(exprs, savelist)3391self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)3392self.setWhitespaceChars( self.exprs[0].whiteChars )3393self.skipWhitespace = self.exprs[0].skipWhitespace3394self.callPreparse = True33953396def parseImpl( self, instring, loc, doActions=True ):3397# pass False as last arg to _parse for first element, since we already3398# pre-parsed the string as part of our And pre-parsing3399loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )3400errorStop = False3401for e in self.exprs[1:]:3402if isinstance(e, And._ErrorStop):3403errorStop = True3404continue3405if errorStop:3406try:3407loc, exprtokens = e._parse( instring, loc, doActions )3408except ParseSyntaxException:3409raise3410except ParseBaseException as pe:3411pe.__traceback__ = None3412raise ParseSyntaxException._from_exception(pe)3413except IndexError:3414raise ParseSyntaxException(instring, len(instring), self.errmsg, self)3415else:3416loc, exprtokens = e._parse( instring, loc, doActions )3417if exprtokens or exprtokens.haskeys():3418resultlist += exprtokens3419return loc, resultlist34203421def __iadd__(self, other ):3422if isinstance( other, basestring ):3423other = ParserElement._literalStringClass( other )3424return self.append( other ) #And( [ self, other ] )34253426def checkRecursion( self, parseElementList ):3427subRecCheckList = parseElementList[:] + [ self ]3428for e in self.exprs:3429e.checkRecursion( subRecCheckList )3430if not e.mayReturnEmpty:3431break34323433def __str__( self ):3434if hasattr(self,"name"):3435return self.name34363437if self.strRepr is None:3438self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"34393440return self.strRepr344134423443class Or(ParseExpression):3444"""3445Requires that at least one C{ParseExpression} is found.3446If two expressions match, the expression that matches the longest string will be used.3447May be constructed using the C{'^'} operator.34483449Example::3450# construct Or using '^' operator34513452number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))3453print(number.searchString("123 3.1416 789"))3454prints::3455[['123'], ['3.1416'], ['789']]3456"""3457def __init__( self, exprs, savelist = False ):3458super(Or,self).__init__(exprs, savelist)3459if self.exprs:3460self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)3461else:3462self.mayReturnEmpty = True34633464def parseImpl( self, instring, loc, doActions=True ):3465maxExcLoc = -13466maxException = None3467matches = []3468for e in self.exprs:3469try:3470loc2 = e.tryParse( instring, loc )3471except ParseException as err:3472err.__traceback__ = None3473if err.loc > maxExcLoc:3474maxException = err3475maxExcLoc = err.loc3476except IndexError:3477if len(instring) > maxExcLoc:3478maxException = ParseException(instring,len(instring),e.errmsg,self)3479maxExcLoc = len(instring)3480else:3481# save match among all matches, to retry longest to shortest3482matches.append((loc2, e))34833484if matches:3485matches.sort(key=lambda x: -x[0])3486for _,e in matches:3487try:3488return e._parse( instring, loc, doActions )3489except ParseException as err:3490err.__traceback__ = None3491if err.loc > maxExcLoc:3492maxException = err3493maxExcLoc = err.loc34943495if maxException is not None:3496maxException.msg = self.errmsg3497raise maxException3498else:3499raise ParseException(instring, loc, "no defined alternatives to match", self)350035013502def __ixor__(self, other ):3503if isinstance( other, basestring ):3504other = ParserElement._literalStringClass( other )3505return self.append( other ) #Or( [ self, other ] )35063507def __str__( self ):3508if hasattr(self,"name"):3509return self.name35103511if self.strRepr is None:3512self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"35133514return self.strRepr35153516def checkRecursion( self, parseElementList ):3517subRecCheckList = parseElementList[:] + [ self ]3518for e in self.exprs:3519e.checkRecursion( subRecCheckList )352035213522class MatchFirst(ParseExpression):3523"""3524Requires that at least one C{ParseExpression} is found.3525If two expressions match, the first one listed is the one that will match.3526May be constructed using the C{'|'} operator.35273528Example::3529# construct MatchFirst using '|' operator35303531# watch the order of expressions to match3532number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))3533print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]35343535# put more selective expression first3536number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)3537print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]3538"""3539def __init__( self, exprs, savelist = False ):3540super(MatchFirst,self).__init__(exprs, savelist)3541if self.exprs:3542self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)3543else:3544self.mayReturnEmpty = True35453546def parseImpl( self, instring, loc, doActions=True ):3547maxExcLoc = -13548maxException = None3549for e in self.exprs:3550try:3551ret = e._parse( instring, loc, doActions )3552return ret3553except ParseException as err:3554if err.loc > maxExcLoc:3555maxException = err3556maxExcLoc = err.loc3557except IndexError:3558if len(instring) > maxExcLoc:3559maxException = ParseException(instring,len(instring),e.errmsg,self)3560maxExcLoc = len(instring)35613562# only got here if no expression matched, raise exception for match that made it the furthest3563else:3564if maxException is not None:3565maxException.msg = self.errmsg3566raise maxException3567else:3568raise ParseException(instring, loc, "no defined alternatives to match", self)35693570def __ior__(self, other ):3571if isinstance( other, basestring ):3572other = ParserElement._literalStringClass( other )3573return self.append( other ) #MatchFirst( [ self, other ] )35743575def __str__( self ):3576if hasattr(self,"name"):3577return self.name35783579if self.strRepr is None:3580self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"35813582return self.strRepr35833584def checkRecursion( self, parseElementList ):3585subRecCheckList = parseElementList[:] + [ self ]3586for e in self.exprs:3587e.checkRecursion( subRecCheckList )358835893590class Each(ParseExpression):3591"""3592Requires all given C{ParseExpression}s to be found, but in any order.3593Expressions may be separated by whitespace.3594May be constructed using the C{'&'} operator.35953596Example::3597color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")3598shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")3599integer = Word(nums)3600shape_attr = "shape:" + shape_type("shape")3601posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")3602color_attr = "color:" + color("color")3603size_attr = "size:" + integer("size")36043605# use Each (using operator '&') to accept attributes in any order3606# (shape and posn are required, color and size are optional)3607shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)36083609shape_spec.runTests('''3610shape: SQUARE color: BLACK posn: 100, 1203611shape: CIRCLE size: 50 color: BLUE posn: 50,803612color:GREEN size:20 shape:TRIANGLE posn:20,403613'''3614)3615prints::3616shape: SQUARE color: BLACK posn: 100, 1203617['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]3618- color: BLACK3619- posn: ['100', ',', '120']3620- x: 1003621- y: 1203622- shape: SQUARE362336243625shape: CIRCLE size: 50 color: BLUE posn: 50,803626['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]3627- color: BLUE3628- posn: ['50', ',', '80']3629- x: 503630- y: 803631- shape: CIRCLE3632- size: 50363336343635color: GREEN size: 20 shape: TRIANGLE posn: 20,403636['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]3637- color: GREEN3638- posn: ['20', ',', '40']3639- x: 203640- y: 403641- shape: TRIANGLE3642- size: 203643"""3644def __init__( self, exprs, savelist = True ):3645super(Each,self).__init__(exprs, savelist)3646self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)3647self.skipWhitespace = True3648self.initExprGroups = True36493650def parseImpl( self, instring, loc, doActions=True ):3651if self.initExprGroups:3652self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))3653opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]3654opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]3655self.optionals = opt1 + opt23656self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]3657self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]3658self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]3659self.required += self.multirequired3660self.initExprGroups = False3661tmpLoc = loc3662tmpReqd = self.required[:]3663tmpOpt = self.optionals[:]3664matchOrder = []36653666keepMatching = True3667while keepMatching:3668tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired3669failed = []3670for e in tmpExprs:3671try:3672tmpLoc = e.tryParse( instring, tmpLoc )3673except ParseException:3674failed.append(e)3675else:3676matchOrder.append(self.opt1map.get(id(e),e))3677if e in tmpReqd:3678tmpReqd.remove(e)3679elif e in tmpOpt:3680tmpOpt.remove(e)3681if len(failed) == len(tmpExprs):3682keepMatching = False36833684if tmpReqd:3685missing = ", ".join(_ustr(e) for e in tmpReqd)3686raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )36873688# add any unmatched Optionals, in case they have default values defined3689matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]36903691resultlist = []3692for e in matchOrder:3693loc,results = e._parse(instring,loc,doActions)3694resultlist.append(results)36953696finalResults = sum(resultlist, ParseResults([]))3697return loc, finalResults36983699def __str__( self ):3700if hasattr(self,"name"):3701return self.name37023703if self.strRepr is None:3704self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"37053706return self.strRepr37073708def checkRecursion( self, parseElementList ):3709subRecCheckList = parseElementList[:] + [ self ]3710for e in self.exprs:3711e.checkRecursion( subRecCheckList )371237133714class ParseElementEnhance(ParserElement):3715"""3716Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.3717"""3718def __init__( self, expr, savelist=False ):3719super(ParseElementEnhance,self).__init__(savelist)3720if isinstance( expr, basestring ):3721if issubclass(ParserElement._literalStringClass, Token):3722expr = ParserElement._literalStringClass(expr)3723else:3724expr = ParserElement._literalStringClass(Literal(expr))3725self.expr = expr3726self.strRepr = None3727if expr is not None:3728self.mayIndexError = expr.mayIndexError3729self.mayReturnEmpty = expr.mayReturnEmpty3730self.setWhitespaceChars( expr.whiteChars )3731self.skipWhitespace = expr.skipWhitespace3732self.saveAsList = expr.saveAsList3733self.callPreparse = expr.callPreparse3734self.ignoreExprs.extend(expr.ignoreExprs)37353736def parseImpl( self, instring, loc, doActions=True ):3737if self.expr is not None:3738return self.expr._parse( instring, loc, doActions, callPreParse=False )3739else:3740raise ParseException("",loc,self.errmsg,self)37413742def leaveWhitespace( self ):3743self.skipWhitespace = False3744self.expr = self.expr.copy()3745if self.expr is not None:3746self.expr.leaveWhitespace()3747return self37483749def ignore( self, other ):3750if isinstance( other, Suppress ):3751if other not in self.ignoreExprs:3752super( ParseElementEnhance, self).ignore( other )3753if self.expr is not None:3754self.expr.ignore( self.ignoreExprs[-1] )3755else:3756super( ParseElementEnhance, self).ignore( other )3757if self.expr is not None:3758self.expr.ignore( self.ignoreExprs[-1] )3759return self37603761def streamline( self ):3762super(ParseElementEnhance,self).streamline()3763if self.expr is not None:3764self.expr.streamline()3765return self37663767def checkRecursion( self, parseElementList ):3768if self in parseElementList:3769raise RecursiveGrammarException( parseElementList+[self] )3770subRecCheckList = parseElementList[:] + [ self ]3771if self.expr is not None:3772self.expr.checkRecursion( subRecCheckList )37733774def validate( self, validateTrace=[] ):3775tmp = validateTrace[:]+[self]3776if self.expr is not None:3777self.expr.validate(tmp)3778self.checkRecursion( [] )37793780def __str__( self ):3781try:3782return super(ParseElementEnhance,self).__str__()3783except Exception:3784pass37853786if self.strRepr is None and self.expr is not None:3787self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )3788return self.strRepr378937903791class FollowedBy(ParseElementEnhance):3792"""3793Lookahead matching of the given parse expression. C{FollowedBy}3794does I{not} advance the parsing position within the input string, it only3795verifies that the specified parse expression matches at the current3796position. C{FollowedBy} always returns a null token list.37973798Example::3799# use FollowedBy to match a label only if it is followed by a ':'3800data_word = Word(alphas)3801label = data_word + FollowedBy(':')3802attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))38033804OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()3805prints::3806[['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]3807"""3808def __init__( self, expr ):3809super(FollowedBy,self).__init__(expr)3810self.mayReturnEmpty = True38113812def parseImpl( self, instring, loc, doActions=True ):3813self.expr.tryParse( instring, loc )3814return loc, []381538163817class NotAny(ParseElementEnhance):3818"""3819Lookahead to disallow matching with the given parse expression. C{NotAny}3820does I{not} advance the parsing position within the input string, it only3821verifies that the specified parse expression does I{not} match at the current3822position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}3823always returns a null token list. May be constructed using the '~' operator.38243825Example::38263827"""3828def __init__( self, expr ):3829super(NotAny,self).__init__(expr)3830#~ self.leaveWhitespace()3831self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs3832self.mayReturnEmpty = True3833self.errmsg = "Found unwanted token, "+_ustr(self.expr)38343835def parseImpl( self, instring, loc, doActions=True ):3836if self.expr.canParseNext(instring, loc):3837raise ParseException(instring, loc, self.errmsg, self)3838return loc, []38393840def __str__( self ):3841if hasattr(self,"name"):3842return self.name38433844if self.strRepr is None:3845self.strRepr = "~{" + _ustr(self.expr) + "}"38463847return self.strRepr38483849class _MultipleMatch(ParseElementEnhance):3850def __init__( self, expr, stopOn=None):3851super(_MultipleMatch, self).__init__(expr)3852self.saveAsList = True3853ender = stopOn3854if isinstance(ender, basestring):3855ender = ParserElement._literalStringClass(ender)3856self.not_ender = ~ender if ender is not None else None38573858def parseImpl( self, instring, loc, doActions=True ):3859self_expr_parse = self.expr._parse3860self_skip_ignorables = self._skipIgnorables3861check_ender = self.not_ender is not None3862if check_ender:3863try_not_ender = self.not_ender.tryParse38643865# must be at least one (but first see if we are the stopOn sentinel;3866# if so, fail)3867if check_ender:3868try_not_ender(instring, loc)3869loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )3870try:3871hasIgnoreExprs = (not not self.ignoreExprs)3872while 1:3873if check_ender:3874try_not_ender(instring, loc)3875if hasIgnoreExprs:3876preloc = self_skip_ignorables( instring, loc )3877else:3878preloc = loc3879loc, tmptokens = self_expr_parse( instring, preloc, doActions )3880if tmptokens or tmptokens.haskeys():3881tokens += tmptokens3882except (ParseException,IndexError):3883pass38843885return loc, tokens38863887class OneOrMore(_MultipleMatch):3888"""3889Repetition of one or more of the given expression.38903891Parameters:3892- expr - expression that must match one or more times3893- stopOn - (default=C{None}) - expression for a terminating sentinel3894(only required if the sentinel would ordinarily match the repetition3895expression)38963897Example::3898data_word = Word(alphas)3899label = data_word + FollowedBy(':')3900attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))39013902text = "shape: SQUARE posn: upper left color: BLACK"3903OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]39043905# use stopOn attribute for OneOrMore to avoid reading label string as part of the data3906attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))3907OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]39083909# could also be written as3910(attr_expr * (1,)).parseString(text).pprint()3911"""39123913def __str__( self ):3914if hasattr(self,"name"):3915return self.name39163917if self.strRepr is None:3918self.strRepr = "{" + _ustr(self.expr) + "}..."39193920return self.strRepr39213922class ZeroOrMore(_MultipleMatch):3923"""3924Optional repetition of zero or more of the given expression.39253926Parameters:3927- expr - expression that must match zero or more times3928- stopOn - (default=C{None}) - expression for a terminating sentinel3929(only required if the sentinel would ordinarily match the repetition3930expression)39313932Example: similar to L{OneOrMore}3933"""3934def __init__( self, expr, stopOn=None):3935super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)3936self.mayReturnEmpty = True39373938def parseImpl( self, instring, loc, doActions=True ):3939try:3940return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)3941except (ParseException,IndexError):3942return loc, []39433944def __str__( self ):3945if hasattr(self,"name"):3946return self.name39473948if self.strRepr is None:3949self.strRepr = "[" + _ustr(self.expr) + "]..."39503951return self.strRepr39523953class _NullToken(object):3954def __bool__(self):3955return False3956__nonzero__ = __bool__3957def __str__(self):3958return ""39593960_optionalNotMatched = _NullToken()3961class Optional(ParseElementEnhance):3962"""3963Optional matching of the given expression.39643965Parameters:3966- expr - expression that must match zero or more times3967- default (optional) - value to be returned if the optional expression is not found.39683969Example::3970# US postal code can be a 5-digit zip, plus optional 4-digit qualifier3971zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))3972zip.runTests('''3973# traditional ZIP code39741234539753976# ZIP+4 form397712101-000139783979# invalid ZIP398098765-3981''')3982prints::3983# traditional ZIP code3984123453985['12345']39863987# ZIP+4 form398812101-00013989['12101-0001']39903991# invalid ZIP399298765-3993^3994FAIL: Expected end of text (at char 5), (line:1, col:6)3995"""3996def __init__( self, expr, default=_optionalNotMatched ):3997super(Optional,self).__init__( expr, savelist=False )3998self.saveAsList = self.expr.saveAsList3999self.defaultValue = default4000self.mayReturnEmpty = True40014002def parseImpl( self, instring, loc, doActions=True ):4003try:4004loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )4005except (ParseException,IndexError):4006if self.defaultValue is not _optionalNotMatched:4007if self.expr.resultsName:4008tokens = ParseResults([ self.defaultValue ])4009tokens[self.expr.resultsName] = self.defaultValue4010else:4011tokens = [ self.defaultValue ]4012else:4013tokens = []4014return loc, tokens40154016def __str__( self ):4017if hasattr(self,"name"):4018return self.name40194020if self.strRepr is None:4021self.strRepr = "[" + _ustr(self.expr) + "]"40224023return self.strRepr40244025class SkipTo(ParseElementEnhance):4026"""4027Token for skipping over all undefined text until the matched expression is found.40284029Parameters:4030- expr - target expression marking the end of the data to be skipped4031- include - (default=C{False}) if True, the target expression is also parsed4032(the skipped text and target expression are returned as a 2-element list).4033- ignore - (default=C{None}) used to define grammars (typically quoted strings and4034comments) that might contain false matches to the target expression4035- failOn - (default=C{None}) define expressions that are not allowed to be4036included in the skipped test; if found before the target expression is found,4037the SkipTo is not a match40384039Example::4040report = '''4041Outstanding Issues Report - 1 Jan 200040424043# | Severity | Description | Days Open4044-----+----------+-------------------------------------------+-----------4045101 | Critical | Intermittent system crash | 6404694 | Cosmetic | Spelling error on Login ('log|n') | 14404779 | Minor | System slow when running too many reports | 474048'''4049integer = Word(nums)4050SEP = Suppress('|')4051# use SkipTo to simply match everything up until the next SEP4052# - ignore quoted strings, so that a '|' character inside a quoted string does not match4053# - parse action will call token.strip() for each matched token, i.e., the description body4054string_data = SkipTo(SEP, ignore=quotedString)4055string_data.setParseAction(tokenMap(str.strip))4056ticket_expr = (integer("issue_num") + SEP4057+ string_data("sev") + SEP4058+ string_data("desc") + SEP4059+ integer("days_open"))40604061for tkt in ticket_expr.searchString(report):4062print tkt.dump()4063prints::4064['101', 'Critical', 'Intermittent system crash', '6']4065- days_open: 64066- desc: Intermittent system crash4067- issue_num: 1014068- sev: Critical4069['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']4070- days_open: 144071- desc: Spelling error on Login ('log|n')4072- issue_num: 944073- sev: Cosmetic4074['79', 'Minor', 'System slow when running too many reports', '47']4075- days_open: 474076- desc: System slow when running too many reports4077- issue_num: 794078- sev: Minor4079"""4080def __init__( self, other, include=False, ignore=None, failOn=None ):4081super( SkipTo, self ).__init__( other )4082self.ignoreExpr = ignore4083self.mayReturnEmpty = True4084self.mayIndexError = False4085self.includeMatch = include4086self.asList = False4087if isinstance(failOn, basestring):4088self.failOn = ParserElement._literalStringClass(failOn)4089else:4090self.failOn = failOn4091self.errmsg = "No match found for "+_ustr(self.expr)40924093def parseImpl( self, instring, loc, doActions=True ):4094startloc = loc4095instrlen = len(instring)4096expr = self.expr4097expr_parse = self.expr._parse4098self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None4099self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None41004101tmploc = loc4102while tmploc <= instrlen:4103if self_failOn_canParseNext is not None:4104# break if failOn expression matches4105if self_failOn_canParseNext(instring, tmploc):4106break41074108if self_ignoreExpr_tryParse is not None:4109# advance past ignore expressions4110while 1:4111try:4112tmploc = self_ignoreExpr_tryParse(instring, tmploc)4113except ParseBaseException:4114break41154116try:4117expr_parse(instring, tmploc, doActions=False, callPreParse=False)4118except (ParseException, IndexError):4119# no match, advance loc in string4120tmploc += 14121else:4122# matched skipto expr, done4123break41244125else:4126# ran off the end of the input string without matching skipto expr, fail4127raise ParseException(instring, loc, self.errmsg, self)41284129# build up return values4130loc = tmploc4131skiptext = instring[startloc:loc]4132skipresult = ParseResults(skiptext)41334134if self.includeMatch:4135loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)4136skipresult += mat41374138return loc, skipresult41394140class Forward(ParseElementEnhance):4141"""4142Forward declaration of an expression to be defined later -4143used for recursive grammars, such as algebraic infix notation.4144When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.41454146Note: take care when assigning to C{Forward} not to overlook precedence of operators.4147Specifically, '|' has a lower precedence than '<<', so that::4148fwdExpr << a | b | c4149will actually be evaluated as::4150(fwdExpr << a) | b | c4151thereby leaving b and c out as parseable alternatives. It is recommended that you4152explicitly group the values inserted into the C{Forward}::4153fwdExpr << (a | b | c)4154Converting to use the '<<=' operator instead will avoid this problem.41554156See L{ParseResults.pprint} for an example of a recursive parser created using4157C{Forward}.4158"""4159def __init__( self, other=None ):4160super(Forward,self).__init__( other, savelist=False )41614162def __lshift__( self, other ):4163if isinstance( other, basestring ):4164other = ParserElement._literalStringClass(other)4165self.expr = other4166self.strRepr = None4167self.mayIndexError = self.expr.mayIndexError4168self.mayReturnEmpty = self.expr.mayReturnEmpty4169self.setWhitespaceChars( self.expr.whiteChars )4170self.skipWhitespace = self.expr.skipWhitespace4171self.saveAsList = self.expr.saveAsList4172self.ignoreExprs.extend(self.expr.ignoreExprs)4173return self41744175def __ilshift__(self, other):4176return self << other41774178def leaveWhitespace( self ):4179self.skipWhitespace = False4180return self41814182def streamline( self ):4183if not self.streamlined:4184self.streamlined = True4185if self.expr is not None:4186self.expr.streamline()4187return self41884189def validate( self, validateTrace=[] ):4190if self not in validateTrace:4191tmp = validateTrace[:]+[self]4192if self.expr is not None:4193self.expr.validate(tmp)4194self.checkRecursion([])41954196def __str__( self ):4197if hasattr(self,"name"):4198return self.name4199return self.__class__.__name__ + ": ..."42004201# stubbed out for now - creates awful memory and perf issues4202self._revertClass = self.__class__4203self.__class__ = _ForwardNoRecurse4204try:4205if self.expr is not None:4206retString = _ustr(self.expr)4207else:4208retString = "None"4209finally:4210self.__class__ = self._revertClass4211return self.__class__.__name__ + ": " + retString42124213def copy(self):4214if self.expr is not None:4215return super(Forward,self).copy()4216else:4217ret = Forward()4218ret <<= self4219return ret42204221class _ForwardNoRecurse(Forward):4222def __str__( self ):4223return "..."42244225class TokenConverter(ParseElementEnhance):4226"""4227Abstract subclass of C{ParseExpression}, for converting parsed results.4228"""4229def __init__( self, expr, savelist=False ):4230super(TokenConverter,self).__init__( expr )#, savelist )4231self.saveAsList = False42324233class Combine(TokenConverter):4234"""4235Converter to concatenate all matching tokens to a single string.4236By default, the matching patterns must also be contiguous in the input string;4237this can be disabled by specifying C{'adjacent=False'} in the constructor.42384239Example::4240real = Word(nums) + '.' + Word(nums)4241print(real.parseString('3.1416')) # -> ['3', '.', '1416']4242# will also erroneously match the following4243print(real.parseString('3. 1416')) # -> ['3', '.', '1416']42444245real = Combine(Word(nums) + '.' + Word(nums))4246print(real.parseString('3.1416')) # -> ['3.1416']4247# no match when there are internal spaces4248print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)4249"""4250def __init__( self, expr, joinString="", adjacent=True ):4251super(Combine,self).__init__( expr )4252# suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself4253if adjacent:4254self.leaveWhitespace()4255self.adjacent = adjacent4256self.skipWhitespace = True4257self.joinString = joinString4258self.callPreparse = True42594260def ignore( self, other ):4261if self.adjacent:4262ParserElement.ignore(self, other)4263else:4264super( Combine, self).ignore( other )4265return self42664267def postParse( self, instring, loc, tokenlist ):4268retToks = tokenlist.copy()4269del retToks[:]4270retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)42714272if self.resultsName and retToks.haskeys():4273return [ retToks ]4274else:4275return retToks42764277class Group(TokenConverter):4278"""4279Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.42804281Example::4282ident = Word(alphas)4283num = Word(nums)4284term = ident | num4285func = ident + Optional(delimitedList(term))4286print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']42874288func = ident + Group(Optional(delimitedList(term)))4289print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]4290"""4291def __init__( self, expr ):4292super(Group,self).__init__( expr )4293self.saveAsList = True42944295def postParse( self, instring, loc, tokenlist ):4296return [ tokenlist ]42974298class Dict(TokenConverter):4299"""4300Converter to return a repetitive expression as a list, but also as a dictionary.4301Each element can also be referenced using the first token in the expression as its key.4302Useful for tabular report scraping when the first column can be used as a item key.43034304Example::4305data_word = Word(alphas)4306label = data_word + FollowedBy(':')4307attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))43084309text = "shape: SQUARE posn: upper left color: light blue texture: burlap"4310attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))43114312# print attributes as plain groups4313print(OneOrMore(attr_expr).parseString(text).dump())43144315# instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names4316result = Dict(OneOrMore(Group(attr_expr))).parseString(text)4317print(result.dump())43184319# access named fields as dict entries, or output as dict4320print(result['shape'])4321print(result.asDict())4322prints::4323['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']43244325[['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]4326- color: light blue4327- posn: upper left4328- shape: SQUARE4329- texture: burlap4330SQUARE4331{'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}4332See more examples at L{ParseResults} of accessing fields by results name.4333"""4334def __init__( self, expr ):4335super(Dict,self).__init__( expr )4336self.saveAsList = True43374338def postParse( self, instring, loc, tokenlist ):4339for i,tok in enumerate(tokenlist):4340if len(tok) == 0:4341continue4342ikey = tok[0]4343if isinstance(ikey,int):4344ikey = _ustr(tok[0]).strip()4345if len(tok)==1:4346tokenlist[ikey] = _ParseResultsWithOffset("",i)4347elif len(tok)==2 and not isinstance(tok[1],ParseResults):4348tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)4349else:4350dictvalue = tok.copy() #ParseResults(i)4351del dictvalue[0]4352if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):4353tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)4354else:4355tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)43564357if self.resultsName:4358return [ tokenlist ]4359else:4360return tokenlist436143624363class Suppress(TokenConverter):4364"""4365Converter for ignoring the results of a parsed expression.43664367Example::4368source = "a, b, c,d"4369wd = Word(alphas)4370wd_list1 = wd + ZeroOrMore(',' + wd)4371print(wd_list1.parseString(source))43724373# often, delimiters that are useful during parsing are just in the4374# way afterward - use Suppress to keep them out of the parsed output4375wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)4376print(wd_list2.parseString(source))4377prints::4378['a', ',', 'b', ',', 'c', ',', 'd']4379['a', 'b', 'c', 'd']4380(See also L{delimitedList}.)4381"""4382def postParse( self, instring, loc, tokenlist ):4383return []43844385def suppress( self ):4386return self438743884389class OnlyOnce(object):4390"""4391Wrapper for parse actions, to ensure they are only called once.4392"""4393def __init__(self, methodCall):4394self.callable = _trim_arity(methodCall)4395self.called = False4396def __call__(self,s,l,t):4397if not self.called:4398results = self.callable(s,l,t)4399self.called = True4400return results4401raise ParseException(s,l,"")4402def reset(self):4403self.called = False44044405def traceParseAction(f):4406"""4407Decorator for debugging parse actions.44084409When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}4410When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.44114412Example::4413wd = Word(alphas)44144415@traceParseAction4416def remove_duplicate_chars(tokens):4417return ''.join(sorted(set(''.join(tokens))))44184419wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)4420print(wds.parseString("slkdjs sld sldd sdlf sdljf"))4421prints::4422>>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))4423<<leaving remove_duplicate_chars (ret: 'dfjkls')4424['dfjkls']4425"""4426f = _trim_arity(f)4427def z(*paArgs):4428thisFunc = f.__name__4429s,l,t = paArgs[-3:]4430if len(paArgs)>3:4431thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc4432sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )4433try:4434ret = f(*paArgs)4435except Exception as exc:4436sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )4437raise4438sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )4439return ret4440try:4441z.__name__ = f.__name__4442except AttributeError:4443pass4444return z44454446#4447# global helpers4448#4449def delimitedList( expr, delim=",", combine=False ):4450"""4451Helper to define a delimited list of expressions - the delimiter defaults to ','.4452By default, the list elements and delimiters can have intervening whitespace, and4453comments, but this can be overridden by passing C{combine=True} in the constructor.4454If C{combine} is set to C{True}, the matching tokens are returned as a single token4455string, with the delimiters included; otherwise, the matching tokens are returned4456as a list of tokens, with the delimiters suppressed.44574458Example::4459delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']4460delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']4461"""4462dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."4463if combine:4464return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)4465else:4466return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)44674468def countedArray( expr, intExpr=None ):4469"""4470Helper to define a counted list of expressions.4471This helper defines a pattern of the form::4472integer expr expr expr...4473where the leading integer tells how many expr expressions follow.4474The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.44754476If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.44774478Example::4479countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']44804481# in this parser, the leading integer value is given in binary,4482# '10' indicating that 2 values are in the array4483binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))4484countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']4485"""4486arrayExpr = Forward()4487def countFieldParseAction(s,l,t):4488n = t[0]4489arrayExpr << (n and Group(And([expr]*n)) or Group(empty))4490return []4491if intExpr is None:4492intExpr = Word(nums).setParseAction(lambda t:int(t[0]))4493else:4494intExpr = intExpr.copy()4495intExpr.setName("arrayLen")4496intExpr.addParseAction(countFieldParseAction, callDuringTry=True)4497return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')44984499def _flatten(L):4500ret = []4501for i in L:4502if isinstance(i,list):4503ret.extend(_flatten(i))4504else:4505ret.append(i)4506return ret45074508def matchPreviousLiteral(expr):4509"""4510Helper to define an expression that is indirectly defined from4511the tokens matched in a previous expression, that is, it looks4512for a 'repeat' of a previous expression. For example::4513first = Word(nums)4514second = matchPreviousLiteral(first)4515matchExpr = first + ":" + second4516will match C{"1:1"}, but not C{"1:2"}. Because this matches a4517previous literal, will also match the leading C{"1:1"} in C{"1:10"}.4518If this is not desired, use C{matchPreviousExpr}.4519Do I{not} use with packrat parsing enabled.4520"""4521rep = Forward()4522def copyTokenToRepeater(s,l,t):4523if t:4524if len(t) == 1:4525rep << t[0]4526else:4527# flatten t tokens4528tflat = _flatten(t.asList())4529rep << And(Literal(tt) for tt in tflat)4530else:4531rep << Empty()4532expr.addParseAction(copyTokenToRepeater, callDuringTry=True)4533rep.setName('(prev) ' + _ustr(expr))4534return rep45354536def matchPreviousExpr(expr):4537"""4538Helper to define an expression that is indirectly defined from4539the tokens matched in a previous expression, that is, it looks4540for a 'repeat' of a previous expression. For example::4541first = Word(nums)4542second = matchPreviousExpr(first)4543matchExpr = first + ":" + second4544will match C{"1:1"}, but not C{"1:2"}. Because this matches by4545expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};4546the expressions are evaluated first, and then compared, so4547C{"1"} is compared with C{"10"}.4548Do I{not} use with packrat parsing enabled.4549"""4550rep = Forward()4551e2 = expr.copy()4552rep <<= e24553def copyTokenToRepeater(s,l,t):4554matchTokens = _flatten(t.asList())4555def mustMatchTheseTokens(s,l,t):4556theseTokens = _flatten(t.asList())4557if theseTokens != matchTokens:4558raise ParseException("",0,"")4559rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )4560expr.addParseAction(copyTokenToRepeater, callDuringTry=True)4561rep.setName('(prev) ' + _ustr(expr))4562return rep45634564def _escapeRegexRangeChars(s):4565#~ escape these chars: ^-]4566for c in r"\^-]":4567s = s.replace(c,_bslash+c)4568s = s.replace("\n",r"\n")4569s = s.replace("\t",r"\t")4570return _ustr(s)45714572def oneOf( strs, caseless=False, useRegex=True ):4573"""4574Helper to quickly define a set of alternative Literals, and makes sure to do4575longest-first testing when there is a conflict, regardless of the input order,4576but returns a C{L{MatchFirst}} for best performance.45774578Parameters:4579- strs - a string of space-delimited literals, or a collection of string literals4580- caseless - (default=C{False}) - treat all literals as caseless4581- useRegex - (default=C{True}) - as an optimization, will generate a Regex4582object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or4583if creating a C{Regex} raises an exception)45844585Example::4586comp_oper = oneOf("< = > <= >= !=")4587var = Word(alphas)4588number = Word(nums)4589term = var | number4590comparison_expr = term + comp_oper + term4591print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))4592prints::4593[['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]4594"""4595if caseless:4596isequal = ( lambda a,b: a.upper() == b.upper() )4597masks = ( lambda a,b: b.upper().startswith(a.upper()) )4598parseElementClass = CaselessLiteral4599else:4600isequal = ( lambda a,b: a == b )4601masks = ( lambda a,b: b.startswith(a) )4602parseElementClass = Literal46034604symbols = []4605if isinstance(strs,basestring):4606symbols = strs.split()4607elif isinstance(strs, Iterable):4608symbols = list(strs)4609else:4610warnings.warn("Invalid argument to oneOf, expected string or iterable",4611SyntaxWarning, stacklevel=2)4612if not symbols:4613return NoMatch()46144615i = 04616while i < len(symbols)-1:4617cur = symbols[i]4618for j,other in enumerate(symbols[i+1:]):4619if ( isequal(other, cur) ):4620del symbols[i+j+1]4621break4622elif ( masks(cur, other) ):4623del symbols[i+j+1]4624symbols.insert(i,other)4625cur = other4626break4627else:4628i += 146294630if not caseless and useRegex:4631#~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))4632try:4633if len(symbols)==len("".join(symbols)):4634return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))4635else:4636return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))4637except Exception:4638warnings.warn("Exception creating Regex for oneOf, building MatchFirst",4639SyntaxWarning, stacklevel=2)464046414642# last resort, just use MatchFirst4643return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))46444645def dictOf( key, value ):4646"""4647Helper to easily and clearly define a dictionary by specifying the respective patterns4648for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens4649in the proper order. The key pattern can include delimiting markers or punctuation,4650as long as they are suppressed, thereby leaving the significant key text. The value4651pattern can include named results, so that the C{Dict} results can include named token4652fields.46534654Example::4655text = "shape: SQUARE posn: upper left color: light blue texture: burlap"4656attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))4657print(OneOrMore(attr_expr).parseString(text).dump())46584659attr_label = label4660attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)46614662# similar to Dict, but simpler call format4663result = dictOf(attr_label, attr_value).parseString(text)4664print(result.dump())4665print(result['shape'])4666print(result.shape) # object attribute access works too4667print(result.asDict())4668prints::4669[['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]4670- color: light blue4671- posn: upper left4672- shape: SQUARE4673- texture: burlap4674SQUARE4675SQUARE4676{'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}4677"""4678return Dict( ZeroOrMore( Group ( key + value ) ) )46794680def originalTextFor(expr, asString=True):4681"""4682Helper to return the original, untokenized text for a given expression. Useful to4683restore the parsed fields of an HTML start tag into the raw tag text itself, or to4684revert separate tokens with intervening whitespace back to the original matching4685input text. By default, returns astring containing the original parsed text.46864687If the optional C{asString} argument is passed as C{False}, then the return value is a4688C{L{ParseResults}} containing any results names that were originally matched, and a4689single token containing the original matched text from the input string. So if4690the expression passed to C{L{originalTextFor}} contains expressions with defined4691results names, you must set C{asString} to C{False} if you want to preserve those4692results name values.46934694Example::4695src = "this is test <b> bold <i>text</i> </b> normal text "4696for tag in ("b","i"):4697opener,closer = makeHTMLTags(tag)4698patt = originalTextFor(opener + SkipTo(closer) + closer)4699print(patt.searchString(src)[0])4700prints::4701['<b> bold <i>text</i> </b>']4702['<i>text</i>']4703"""4704locMarker = Empty().setParseAction(lambda s,loc,t: loc)4705endlocMarker = locMarker.copy()4706endlocMarker.callPreparse = False4707matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")4708if asString:4709extractText = lambda s,l,t: s[t._original_start:t._original_end]4710else:4711def extractText(s,l,t):4712t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]4713matchExpr.setParseAction(extractText)4714matchExpr.ignoreExprs = expr.ignoreExprs4715return matchExpr47164717def ungroup(expr):4718"""4719Helper to undo pyparsing's default grouping of And expressions, even4720if all but one are non-empty.4721"""4722return TokenConverter(expr).setParseAction(lambda t:t[0])47234724def locatedExpr(expr):4725"""4726Helper to decorate a returned token with its starting and ending locations in the input string.4727This helper adds the following results names:4728- locn_start = location where matched expression begins4729- locn_end = location where matched expression ends4730- value = the actual parsed results47314732Be careful if the input text contains C{<TAB>} characters, you may want to call4733C{L{ParserElement.parseWithTabs}}47344735Example::4736wd = Word(alphas)4737for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):4738print(match)4739prints::4740[[0, 'ljsdf', 5]]4741[[8, 'lksdjjf', 15]]4742[[18, 'lkkjj', 23]]4743"""4744locator = Empty().setParseAction(lambda s,l,t: l)4745return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))474647474748# convenience constants for positional expressions4749empty = Empty().setName("empty")4750lineStart = LineStart().setName("lineStart")4751lineEnd = LineEnd().setName("lineEnd")4752stringStart = StringStart().setName("stringStart")4753stringEnd = StringEnd().setName("stringEnd")47544755_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])4756_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))4757_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))4758_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)4759_charRange = Group(_singleChar + Suppress("-") + _singleChar)4760_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"47614762def srange(s):4763r"""4764Helper to easily define string ranges for use in Word construction. Borrows4765syntax from regexp '[]' string range definitions::4766srange("[0-9]") -> "0123456789"4767srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"4768srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"4769The input string must be enclosed in []'s, and the returned string is the expanded4770character set joined into a single string.4771The values enclosed in the []'s may be:4772- a single character4773- an escaped character with a leading backslash (such as C{\-} or C{\]})4774- an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)4775(C{\0x##} is also supported for backwards compatibility)4776- an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)4777- a range of any of the above, separated by a dash (C{'a-z'}, etc.)4778- any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)4779"""4780_expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))4781try:4782return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)4783except Exception:4784return ""47854786def matchOnlyAtCol(n):4787"""4788Helper method for defining parse actions that require matching at a specific4789column in the input text.4790"""4791def verifyCol(strg,locn,toks):4792if col(locn,strg) != n:4793raise ParseException(strg,locn,"matched token not at column %d" % n)4794return verifyCol47954796def replaceWith(replStr):4797"""4798Helper method for common parse actions that simply return a literal value. Especially4799useful when used with C{L{transformString<ParserElement.transformString>}()}.48004801Example::4802num = Word(nums).setParseAction(lambda toks: int(toks[0]))4803na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))4804term = na | num48054806OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]4807"""4808return lambda s,l,t: [replStr]48094810def removeQuotes(s,l,t):4811"""4812Helper parse action for removing quotation marks from parsed quoted strings.48134814Example::4815# by default, quotation marks are included in parsed results4816quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]48174818# use removeQuotes to strip quotation marks from parsed results4819quotedString.setParseAction(removeQuotes)4820quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]4821"""4822return t[0][1:-1]48234824def tokenMap(func, *args):4825"""4826Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional4827args are passed, they are forwarded to the given function as additional arguments after4828the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the4829parsed data to an integer using base 16.48304831Example (compare the last to example in L{ParserElement.transformString}::4832hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))4833hex_ints.runTests('''483400 11 22 aa FF 0a 0d 1a4835''')48364837upperword = Word(alphas).setParseAction(tokenMap(str.upper))4838OneOrMore(upperword).runTests('''4839my kingdom for a horse4840''')48414842wd = Word(alphas).setParseAction(tokenMap(str.title))4843OneOrMore(wd).setParseAction(' '.join).runTests('''4844now is the winter of our discontent made glorious summer by this sun of york4845''')4846prints::484700 11 22 aa FF 0a 0d 1a4848[0, 17, 34, 170, 255, 10, 13, 26]48494850my kingdom for a horse4851['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']48524853now is the winter of our discontent made glorious summer by this sun of york4854['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']4855"""4856def pa(s,l,t):4857return [func(tokn, *args) for tokn in t]48584859try:4860func_name = getattr(func, '__name__',4861getattr(func, '__class__').__name__)4862except Exception:4863func_name = str(func)4864pa.__name__ = func_name48654866return pa48674868upcaseTokens = tokenMap(lambda t: _ustr(t).upper())4869"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""48704871downcaseTokens = tokenMap(lambda t: _ustr(t).lower())4872"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""48734874def _makeTags(tagStr, xml):4875"""Internal helper to construct opening and closing tag expressions, given a tag name"""4876if isinstance(tagStr,basestring):4877resname = tagStr4878tagStr = Keyword(tagStr, caseless=not xml)4879else:4880resname = tagStr.name48814882tagAttrName = Word(alphas,alphanums+"_-:")4883if (xml):4884tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )4885openTag = Suppress("<") + tagStr("tag") + \4886Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \4887Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")4888else:4889printablesLessRAbrack = "".join(c for c in printables if c not in ">")4890tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)4891openTag = Suppress("<") + tagStr("tag") + \4892Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \4893Optional( Suppress("=") + tagAttrValue ) ))) + \4894Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")4895closeTag = Combine(_L("</") + tagStr + ">")48964897openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)4898closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)4899openTag.tag = resname4900closeTag.tag = resname4901return openTag, closeTag49024903def makeHTMLTags(tagStr):4904"""4905Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches4906tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.49074908Example::4909text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'4910# makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple4911a,a_end = makeHTMLTags("A")4912link_expr = a + SkipTo(a_end)("link_text") + a_end49134914for link in link_expr.searchString(text):4915# attributes in the <A> tag (like "href" shown here) are also accessible as named results4916print(link.link_text, '->', link.href)4917prints::4918pyparsing -> http://pyparsing.wikispaces.com4919"""4920return _makeTags( tagStr, False )49214922def makeXMLTags(tagStr):4923"""4924Helper to construct opening and closing tag expressions for XML, given a tag name. Matches4925tags only in the given upper/lower case.49264927Example: similar to L{makeHTMLTags}4928"""4929return _makeTags( tagStr, True )49304931def withAttribute(*args,**attrDict):4932"""4933Helper to create a validating parse action to be used with start tags created4934with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag4935with a required attribute value, to avoid false matches on common tags such as4936C{<TD>} or C{<DIV>}.49374938Call C{withAttribute} with a series of attribute names and values. Specify the list4939of filter attributes names and values as:4940- keyword arguments, as in C{(align="right")}, or4941- as an explicit dict with C{**} operator, when an attribute name is also a Python4942reserved word, as in C{**{"class":"Customer", "align":"right"}}4943- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )4944For attribute names with a namespace prefix, you must use the second form. Attribute4945names are matched insensitive to upper/lower case.49464947If just testing for C{class} (with or without a namespace), use C{L{withClass}}.49484949To verify that the attribute exists, but without specifying a value, pass4950C{withAttribute.ANY_VALUE} as the value.49514952Example::4953html = '''4954<div>4955Some text4956<div type="grid">1 4 0 1 0</div>4957<div type="graph">1,3 2,3 1,1</div>4958<div>this has no type</div>4959</div>49604961'''4962div,div_end = makeHTMLTags("div")49634964# only match div tag having a type attribute with value "grid"4965div_grid = div().setParseAction(withAttribute(type="grid"))4966grid_expr = div_grid + SkipTo(div | div_end)("body")4967for grid_header in grid_expr.searchString(html):4968print(grid_header.body)49694970# construct a match with any div tag having a type attribute, regardless of the value4971div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))4972div_expr = div_any_type + SkipTo(div | div_end)("body")4973for div_header in div_expr.searchString(html):4974print(div_header.body)4975prints::49761 4 0 1 0497749781 4 0 1 049791,3 2,3 1,14980"""4981if args:4982attrs = args[:]4983else:4984attrs = attrDict.items()4985attrs = [(k,v) for k,v in attrs]4986def pa(s,l,tokens):4987for attrName,attrValue in attrs:4988if attrName not in tokens:4989raise ParseException(s,l,"no matching attribute " + attrName)4990if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:4991raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %4992(attrName, tokens[attrName], attrValue))4993return pa4994withAttribute.ANY_VALUE = object()49954996def withClass(classname, namespace=''):4997"""4998Simplified version of C{L{withAttribute}} when matching on a div class - made4999difficult because C{class} is a reserved word in Python.50005001Example::5002html = '''5003<div>5004Some text5005<div class="grid">1 4 0 1 0</div>5006<div class="graph">1,3 2,3 1,1</div>5007<div>this <div> has no class</div>5008</div>50095010'''5011div,div_end = makeHTMLTags("div")5012div_grid = div().setParseAction(withClass("grid"))50135014grid_expr = div_grid + SkipTo(div | div_end)("body")5015for grid_header in grid_expr.searchString(html):5016print(grid_header.body)50175018div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))5019div_expr = div_any_type + SkipTo(div | div_end)("body")5020for div_header in div_expr.searchString(html):5021print(div_header.body)5022prints::50231 4 0 1 0502450251 4 0 1 050261,3 2,3 1,15027"""5028classattr = "%s:class" % namespace if namespace else "class"5029return withAttribute(**{classattr : classname})50305031opAssoc = _Constants()5032opAssoc.LEFT = object()5033opAssoc.RIGHT = object()50345035def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):5036"""5037Helper method for constructing grammars of expressions made up of5038operators working in a precedence hierarchy. Operators may be unary or5039binary, left- or right-associative. Parse actions can also be attached5040to operator expressions. The generated parser will also recognize the use5041of parentheses to override operator precedences (see example below).50425043Note: if you define a deep operator list, you may see performance issues5044when using infixNotation. See L{ParserElement.enablePackrat} for a5045mechanism to potentially improve your parser performance.50465047Parameters:5048- baseExpr - expression representing the most basic element for the nested5049- opList - list of tuples, one for each operator precedence level in the5050expression grammar; each tuple is of the form5051(opExpr, numTerms, rightLeftAssoc, parseAction), where:5052- opExpr is the pyparsing expression for the operator;5053may also be a string, which will be converted to a Literal;5054if numTerms is 3, opExpr is a tuple of two expressions, for the5055two operators separating the 3 terms5056- numTerms is the number of terms for this operator (must5057be 1, 2, or 3)5058- rightLeftAssoc is the indicator whether the operator is5059right or left associative, using the pyparsing-defined5060constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.5061- parseAction is the parse action to be associated with5062expressions matching this operator expression (the5063parse action tuple member may be omitted); if the parse action5064is passed a tuple or list of functions, this is equivalent to5065calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})5066- lpar - expression for matching left-parentheses (default=C{Suppress('(')})5067- rpar - expression for matching right-parentheses (default=C{Suppress(')')})50685069Example::5070# simple example of four-function arithmetic with ints and variable names5071integer = pyparsing_common.signed_integer5072varname = pyparsing_common.identifier50735074arith_expr = infixNotation(integer | varname,5075[5076('-', 1, opAssoc.RIGHT),5077(oneOf('* /'), 2, opAssoc.LEFT),5078(oneOf('+ -'), 2, opAssoc.LEFT),5079])50805081arith_expr.runTests('''50825+3*65083(5+3)*65084-2--115085''', fullDump=False)5086prints::50875+3*65088[[5, '+', [3, '*', 6]]]50895090(5+3)*65091[[[5, '+', 3], '*', 6]]50925093-2--115094[[['-', 2], '-', ['-', 11]]]5095"""5096ret = Forward()5097lastExpr = baseExpr | ( lpar + ret + rpar )5098for i,operDef in enumerate(opList):5099opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]5100termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr5101if arity == 3:5102if opExpr is None or len(opExpr) != 2:5103raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")5104opExpr1, opExpr2 = opExpr5105thisExpr = Forward().setName(termName)5106if rightLeftAssoc == opAssoc.LEFT:5107if arity == 1:5108matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )5109elif arity == 2:5110if opExpr is not None:5111matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )5112else:5113matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )5114elif arity == 3:5115matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \5116Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )5117else:5118raise ValueError("operator must be unary (1), binary (2), or ternary (3)")5119elif rightLeftAssoc == opAssoc.RIGHT:5120if arity == 1:5121# try to avoid LR with this extra test5122if not isinstance(opExpr, Optional):5123opExpr = Optional(opExpr)5124matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )5125elif arity == 2:5126if opExpr is not None:5127matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )5128else:5129matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )5130elif arity == 3:5131matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \5132Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )5133else:5134raise ValueError("operator must be unary (1), binary (2), or ternary (3)")5135else:5136raise ValueError("operator must indicate right or left associativity")5137if pa:5138if isinstance(pa, (tuple, list)):5139matchExpr.setParseAction(*pa)5140else:5141matchExpr.setParseAction(pa)5142thisExpr <<= ( matchExpr.setName(termName) | lastExpr )5143lastExpr = thisExpr5144ret <<= lastExpr5145return ret51465147operatorPrecedence = infixNotation5148"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""51495150dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")5151sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")5152quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|5153Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")5154unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")51555156def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):5157"""5158Helper method for defining nested lists enclosed in opening and closing5159delimiters ("(" and ")" are the default).51605161Parameters:5162- opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression5163- closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression5164- content - expression for items within the nested lists (default=C{None})5165- ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})51665167If an expression is not provided for the content argument, the nested5168expression will capture all whitespace-delimited content between delimiters5169as a list of separate values.51705171Use the C{ignoreExpr} argument to define expressions that may contain5172opening or closing characters that should not be treated as opening5173or closing characters for nesting, such as quotedString or a comment5174expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.5175The default is L{quotedString}, but if no expressions are to be ignored,5176then pass C{None} for this argument.51775178Example::5179data_type = oneOf("void int short long char float double")5180decl_data_type = Combine(data_type + Optional(Word('*')))5181ident = Word(alphas+'_', alphanums+'_')5182number = pyparsing_common.number5183arg = Group(decl_data_type + ident)5184LPAR,RPAR = map(Suppress, "()")51855186code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))51875188c_function = (decl_data_type("type")5189+ ident("name")5190+ LPAR + Optional(delimitedList(arg), [])("args") + RPAR5191+ code_body("body"))5192c_function.ignore(cStyleComment)51935194source_code = '''5195int is_odd(int x) {5196return (x%2);5197}51985199int dec_to_hex(char hchar) {5200if (hchar >= '0' && hchar <= '9') {5201return (ord(hchar)-ord('0'));5202} else {5203return (10+ord(hchar)-ord('A'));5204}5205}5206'''5207for func in c_function.searchString(source_code):5208print("%(name)s (%(type)s) args: %(args)s" % func)52095210prints::5211is_odd (int) args: [['int', 'x']]5212dec_to_hex (int) args: [['char', 'hchar']]5213"""5214if opener == closer:5215raise ValueError("opening and closing strings cannot be the same")5216if content is None:5217if isinstance(opener,basestring) and isinstance(closer,basestring):5218if len(opener) == 1 and len(closer)==1:5219if ignoreExpr is not None:5220content = (Combine(OneOrMore(~ignoreExpr +5221CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))5222).setParseAction(lambda t:t[0].strip()))5223else:5224content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS5225).setParseAction(lambda t:t[0].strip()))5226else:5227if ignoreExpr is not None:5228content = (Combine(OneOrMore(~ignoreExpr +5229~Literal(opener) + ~Literal(closer) +5230CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))5231).setParseAction(lambda t:t[0].strip()))5232else:5233content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +5234CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))5235).setParseAction(lambda t:t[0].strip()))5236else:5237raise ValueError("opening and closing arguments must be strings if no content expression is given")5238ret = Forward()5239if ignoreExpr is not None:5240ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )5241else:5242ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )5243ret.setName('nested %s%s expression' % (opener,closer))5244return ret52455246def indentedBlock(blockStatementExpr, indentStack, indent=True):5247"""5248Helper method for defining space-delimited indentation blocks, such as5249those used to define block statements in Python source code.52505251Parameters:5252- blockStatementExpr - expression defining syntax of statement that5253is repeated within the indented block5254- indentStack - list created by caller to manage indentation stack5255(multiple statementWithIndentedBlock expressions within a single grammar5256should share a common indentStack)5257- indent - boolean indicating whether block must be indented beyond the5258the current level; set to False for block of left-most statements5259(default=C{True})52605261A valid block must contain at least one C{blockStatement}.52625263Example::5264data = '''5265def A(z):5266A15267B = 1005268G = A25269A25270A35271B5272def BB(a,b,c):5273BB15274def BBA():5275bba15276bba25277bba35278C5279D5280def spam(x,y):5281def eggs(z):5282pass5283'''528452855286indentStack = [1]5287stmt = Forward()52885289identifier = Word(alphas, alphanums)5290funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")5291func_body = indentedBlock(stmt, indentStack)5292funcDef = Group( funcDecl + func_body )52935294rvalue = Forward()5295funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")5296rvalue << (funcCall | identifier | Word(nums))5297assignment = Group(identifier + "=" + rvalue)5298stmt << ( funcDef | assignment | identifier )52995300module_body = OneOrMore(stmt)53015302parseTree = module_body.parseString(data)5303parseTree.pprint()5304prints::5305[['def',5306'A',5307['(', 'z', ')'],5308':',5309[['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],5310'B',5311['def',5312'BB',5313['(', 'a', 'b', 'c', ')'],5314':',5315[['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],5316'C',5317'D',5318['def',5319'spam',5320['(', 'x', 'y', ')'],5321':',5322[[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]5323"""5324def checkPeerIndent(s,l,t):5325if l >= len(s): return5326curCol = col(l,s)5327if curCol != indentStack[-1]:5328if curCol > indentStack[-1]:5329raise ParseFatalException(s,l,"illegal nesting")5330raise ParseException(s,l,"not a peer entry")53315332def checkSubIndent(s,l,t):5333curCol = col(l,s)5334if curCol > indentStack[-1]:5335indentStack.append( curCol )5336else:5337raise ParseException(s,l,"not a subentry")53385339def checkUnindent(s,l,t):5340if l >= len(s): return5341curCol = col(l,s)5342if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):5343raise ParseException(s,l,"not an unindent")5344indentStack.pop()53455346NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())5347INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')5348PEER = Empty().setParseAction(checkPeerIndent).setName('')5349UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')5350if indent:5351smExpr = Group( Optional(NL) +5352#~ FollowedBy(blockStatementExpr) +5353INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)5354else:5355smExpr = Group( Optional(NL) +5356(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )5357blockStatementExpr.ignore(_bslash + LineEnd())5358return smExpr.setName('indented block')53595360alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")5361punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")53625363anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))5364_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))5365commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")5366def replaceHTMLEntity(t):5367"""Helper parser action to replace common HTML entities with their special characters"""5368return _htmlEntityMap.get(t.entity)53695370# it's easy to get these comment structures wrong - they're very common, so may as well make them available5371cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")5372"Comment of the form C{/* ... */}"53735374htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")5375"Comment of the form C{<!-- ... -->}"53765377restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")5378dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")5379"Comment of the form C{// ... (to end of line)}"53805381cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")5382"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"53835384javaStyleComment = cppStyleComment5385"Same as C{L{cppStyleComment}}"53865387pythonStyleComment = Regex(r"#.*").setName("Python style comment")5388"Comment of the form C{# ... (to end of line)}"53895390_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +5391Optional( Word(" \t") +5392~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")5393commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")5394"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.5395This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""53965397# some other useful expressions - using lower-case class name since we are really using this as a namespace5398class pyparsing_common:5399"""5400Here are some common low-level expressions that may be useful in jump-starting parser development:5401- numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})5402- common L{programming identifiers<identifier>}5403- network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})5404- ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}5405- L{UUID<uuid>}5406- L{comma-separated list<comma_separated_list>}5407Parse actions:5408- C{L{convertToInteger}}5409- C{L{convertToFloat}}5410- C{L{convertToDate}}5411- C{L{convertToDatetime}}5412- C{L{stripHTMLTags}}5413- C{L{upcaseTokens}}5414- C{L{downcaseTokens}}54155416Example::5417pyparsing_common.number.runTests('''5418# any int or real number, returned as the appropriate type54191005420-1005421+10054223.1415954236.02e2354241e-125425''')54265427pyparsing_common.fnumber.runTests('''5428# any int or real number, returned as float54291005430-1005431+10054323.1415954336.02e2354341e-125435''')54365437pyparsing_common.hex_integer.runTests('''5438# hex numbers54391005440FF5441''')54425443pyparsing_common.fraction.runTests('''5444# fractions54451/25446-3/45447''')54485449pyparsing_common.mixed_integer.runTests('''5450# mixed fractions5451154521/25453-3/454541-3/45455''')54565457import uuid5458pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))5459pyparsing_common.uuid.runTests('''5460# uuid546112345678-1234-5678-1234-5678123456785462''')5463prints::5464# any int or real number, returned as the appropriate type54651005466[100]54675468-1005469[-100]54705471+1005472[100]547354743.141595475[3.14159]547654776.02e235478[6.02e+23]547954801e-125481[1e-12]54825483# any int or real number, returned as float54841005485[100.0]54865487-1005488[-100.0]54895490+1005491[100.0]549254933.141595494[3.14159]549554966.02e235497[6.02e+23]549854991e-125500[1e-12]55015502# hex numbers55031005504[256]55055506FF5507[255]55085509# fractions55101/25511[0.5]55125513-3/45514[-0.75]55155516# mixed fractions551715518[1]551955201/25521[0.5]55225523-3/45524[-0.75]552555261-3/45527[1.75]55285529# uuid553012345678-1234-5678-1234-5678123456785531[UUID('12345678-1234-5678-1234-567812345678')]5532"""55335534convertToInteger = tokenMap(int)5535"""5536Parse action for converting parsed integers to Python int5537"""55385539convertToFloat = tokenMap(float)5540"""5541Parse action for converting parsed numbers to Python float5542"""55435544integer = Word(nums).setName("integer").setParseAction(convertToInteger)5545"""expression that parses an unsigned integer, returns an int"""55465547hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))5548"""expression that parses a hexadecimal integer, returns an int"""55495550signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)5551"""expression that parses an integer with optional leading sign, returns an int"""55525553fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")5554"""fractional expression of an integer divided by an integer, returns a float"""5555fraction.addParseAction(lambda t: t[0]/t[-1])55565557mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")5558"""mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""5559mixed_integer.addParseAction(sum)55605561real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)5562"""expression that parses a floating point number and returns a float"""55635564sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)5565"""expression that parses a floating point number with optional scientific notation and returns a float"""55665567# streamlining this expression makes the docs nicer-looking5568number = (sci_real | real | signed_integer).streamline()5569"""any numeric expression, returns the corresponding Python type"""55705571fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)5572"""any int or real number, returned as float"""55735574identifier = Word(alphas+'_', alphanums+'_').setName("identifier")5575"""typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""55765577ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")5578"IPv4 address (C{0.0.0.0 - 255.255.255.255})"55795580_ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")5581_full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")5582_short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")5583_short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)5584_mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")5585ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")5586"IPv6 address (long, short, or mixed form)"55875588mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")5589"MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"55905591@staticmethod5592def convertToDate(fmt="%Y-%m-%d"):5593"""5594Helper to create a parse action for converting parsed date string to Python datetime.date55955596Params -5597- fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})55985599Example::5600date_expr = pyparsing_common.iso8601_date.copy()5601date_expr.setParseAction(pyparsing_common.convertToDate())5602print(date_expr.parseString("1999-12-31"))5603prints::5604[datetime.date(1999, 12, 31)]5605"""5606def cvt_fn(s,l,t):5607try:5608return datetime.strptime(t[0], fmt).date()5609except ValueError as ve:5610raise ParseException(s, l, str(ve))5611return cvt_fn56125613@staticmethod5614def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):5615"""5616Helper to create a parse action for converting parsed datetime string to Python datetime.datetime56175618Params -5619- fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})56205621Example::5622dt_expr = pyparsing_common.iso8601_datetime.copy()5623dt_expr.setParseAction(pyparsing_common.convertToDatetime())5624print(dt_expr.parseString("1999-12-31T23:59:59.999"))5625prints::5626[datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]5627"""5628def cvt_fn(s,l,t):5629try:5630return datetime.strptime(t[0], fmt)5631except ValueError as ve:5632raise ParseException(s, l, str(ve))5633return cvt_fn56345635iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")5636"ISO8601 date (C{yyyy-mm-dd})"56375638iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")5639"ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"56405641uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")5642"UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"56435644_html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()5645@staticmethod5646def stripHTMLTags(s, l, tokens):5647"""5648Parse action to remove HTML tags from web page HTML source56495650Example::5651# strip HTML links from normal text5652text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'5653td,td_end = makeHTMLTags("TD")5654table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end56555656print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'5657"""5658return pyparsing_common._html_stripper.transformString(tokens[0])56595660_commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')5661+ Optional( White(" \t") ) ) ).streamline().setName("commaItem")5662comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")5663"""Predefined expression of 1 or more printable words or quoted strings, separated by commas."""56645665upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))5666"""Parse action to convert tokens to upper case."""56675668downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))5669"""Parse action to convert tokens to lower case."""567056715672if __name__ == "__main__":56735674selectToken = CaselessLiteral("select")5675fromToken = CaselessLiteral("from")56765677ident = Word(alphas, alphanums + "_$")56785679columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)5680columnNameList = Group(delimitedList(columnName)).setName("columns")5681columnSpec = ('*' | columnNameList)56825683tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)5684tableNameList = Group(delimitedList(tableName)).setName("tables")56855686simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")56875688# demo runTests method, including embedded comments in test string5689simpleSQL.runTests("""5690# '*' as column list and dotted table name5691select * from SYS.XYZZY56925693# caseless match on "SELECT", and casts back to "select"5694SELECT * from XYZZY, ABC56955696# list of column names, and mixed case SELECT keyword5697Select AA,BB,CC from Sys.dual56985699# multiple tables5700Select A, B, C from Sys.dual, Table257015702# invalid SELECT keyword - should fail5703Xelect A, B, C from Sys.dual57045705# incomplete command - should fail5706Select57075708# invalid column name - should fail5709Select ^^^ frox Sys.dual57105711""")57125713pyparsing_common.number.runTests("""57141005715-1005716+10057173.1415957186.02e2357191e-125720""")57215722# any int or real number, returned as float5723pyparsing_common.fnumber.runTests("""57241005725-1005726+10057273.1415957286.02e2357291e-125730""")57315732pyparsing_common.hex_integer.runTests("""57331005734FF5735""")57365737import uuid5738pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))5739pyparsing_common.uuid.runTests("""574012345678-1234-5678-1234-5678123456785741""")574257435744