CoCalc -- pyparsing.py

GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/setuptools/_vendor/pyparsing.py
⁸¹¹ views
1
# module pyparsing.py
2
#
3
# Copyright (c) 2003-2018  Paul T. McGuire
4
#
5
# Permission is hereby granted, free of charge, to any person obtaining
6
# a copy of this software and associated documentation files (the
7
# "Software"), to deal in the Software without restriction, including
8
# without limitation the rights to use, copy, modify, merge, publish,
9
# distribute, sublicense, and/or sell copies of the Software, and to
10
# permit persons to whom the Software is furnished to do so, subject to
11
# the following conditions:
12
#
13
# The above copyright notice and this permission notice shall be
14
# included in all copies or substantial portions of the Software.
15
#
16
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
#
24

25
__doc__ = \
26
"""
27
pyparsing module - Classes and methods to define and execute parsing grammars
28
=============================================================================
29

30
The pyparsing module is an alternative approach to creating and executing simple grammars,
31
vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
32
don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33
provides a library of classes that you use to construct the grammar directly in Python.
34

35
Here is a program to parse "Hello, World!" (or any greeting of the form 
36
C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements 
37
(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
38
L{Literal} expressions)::
39

40
    from pyparsing import Word, alphas
41

42
    # define grammar of a greeting
43
    greet = Word(alphas) + "," + Word(alphas) + "!"
44

45
    hello = "Hello, World!"
46
    print (hello, "->", greet.parseString(hello))
47

48
The program outputs the following::
49

50
    Hello, World! -> ['Hello', ',', 'World', '!']
51

52
The Python representation of the grammar is quite readable, owing to the self-explanatory
53
class names, and the use of '+', '|' and '^' operators.
54

55
The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
56
object with named attributes.
57

58
The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
59
 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
60
 - quoted strings
61
 - embedded comments
62

63

64
Getting Started -
65
-----------------
66
Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
67
classes inherit from. Use the docstrings for examples of how to:
68
 - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
69
 - construct character word-group expressions using the L{Word} class
70
 - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
71
 - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones
72
 - associate names with your parsed results using L{ParserElement.setResultsName}
73
 - find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
74
 - find more useful common expressions in the L{pyparsing_common} namespace class
75
"""
76

77
__version__ = "2.2.1"
78
__versionTime__ = "18 Sep 2018 00:49 UTC"
79
__author__ = "Paul McGuire <[email protected]>"
80

81
import string
82
from weakref import ref as wkref
83
import copy
84
import sys
85
import warnings
86
import re
87
import sre_constants
88
import collections
89
import pprint
90
import traceback
91
import types
92
from datetime import datetime
93

94
try:
95
    from _thread import RLock
96
except ImportError:
97
    from threading import RLock
98

99
try:
100
    # Python 3
101
    from collections.abc import Iterable
102
    from collections.abc import MutableMapping
103
except ImportError:
104
    # Python 2.7
105
    from collections import Iterable
106
    from collections import MutableMapping
107

108
try:
109
    from collections import OrderedDict as _OrderedDict
110
except ImportError:
111
    try:
112
        from ordereddict import OrderedDict as _OrderedDict
113
    except ImportError:
114
        _OrderedDict = None
115

116
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
117

118
__all__ = [
119
'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
120
'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
121
'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
122
'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
123
'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
124
'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 
125
'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
126
'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
127
'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
128
'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
129
'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
130
'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
131
'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
132
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 
133
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
134
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
135
'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
136
'CloseMatch', 'tokenMap', 'pyparsing_common',
137
]
138

139
system_version = tuple(sys.version_info)[:3]
140
PY_3 = system_version[0] == 3
141
if PY_3:
142
    _MAX_INT = sys.maxsize
143
    basestring = str
144
    unichr = chr
145
    _ustr = str
146

147
    # build list of single arg builtins, that can be used as parse actions
148
    singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
149

150
else:
151
    _MAX_INT = sys.maxint
152
    range = xrange
153

154
    def _ustr(obj):
155
        """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
156
           str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
157
           then < returns the unicode object | encodes it with the default encoding | ... >.
158
        """
159
        if isinstance(obj,unicode):
160
            return obj
161

162
        try:
163
            # If this works, then _ustr(obj) has the same behaviour as str(obj), so
164
            # it won't break any existing code.
165
            return str(obj)
166

167
        except UnicodeEncodeError:
168
            # Else encode it
169
            ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
170
            xmlcharref = Regex(r'&#\d+;')
171
            xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
172
            return xmlcharref.transformString(ret)
173

174
    # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
175
    singleArgBuiltins = []
176
    import __builtin__
177
    for fname in "sum len sorted reversed list tuple set any all min max".split():
178
        try:
179
            singleArgBuiltins.append(getattr(__builtin__,fname))
180
        except AttributeError:
181
            continue
182
            
183
_generatorType = type((y for y in range(1)))
184
 
185
def _xml_escape(data):
186
    """Escape &, <, >, ", ', etc. in a string of data."""
187

188
    # ampersand must be replaced first
189
    from_symbols = '&><"\''
190
    to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
191
    for from_,to_ in zip(from_symbols, to_symbols):
192
        data = data.replace(from_, to_)
193
    return data
194

195
class _Constants(object):
196
    pass
197

198
alphas     = string.ascii_uppercase + string.ascii_lowercase
199
nums       = "0123456789"
200
hexnums    = nums + "ABCDEFabcdef"
201
alphanums  = alphas + nums
202
_bslash    = chr(92)
203
printables = "".join(c for c in string.printable if c not in string.whitespace)
204

205
class ParseBaseException(Exception):
206
    """base exception class for all parsing runtime exceptions"""
207
    # Performance tuning: we construct a *lot* of these, so keep this
208
    # constructor as small and fast as possible
209
    def __init__( self, pstr, loc=0, msg=None, elem=None ):
210
        self.loc = loc
211
        if msg is None:
212
            self.msg = pstr
213
            self.pstr = ""
214
        else:
215
            self.msg = msg
216
            self.pstr = pstr
217
        self.parserElement = elem
218
        self.args = (pstr, loc, msg)
219

220
    @classmethod
221
    def _from_exception(cls, pe):
222
        """
223
        internal factory method to simplify creating one type of ParseException 
224
        from another - avoids having __init__ signature conflicts among subclasses
225
        """
226
        return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
227

228
    def __getattr__( self, aname ):
229
        """supported attributes by name are:
230
            - lineno - returns the line number of the exception text
231
            - col - returns the column number of the exception text
232
            - line - returns the line containing the exception text
233
        """
234
        if( aname == "lineno" ):
235
            return lineno( self.loc, self.pstr )
236
        elif( aname in ("col", "column") ):
237
            return col( self.loc, self.pstr )
238
        elif( aname == "line" ):
239
            return line( self.loc, self.pstr )
240
        else:
241
            raise AttributeError(aname)
242

243
    def __str__( self ):
244
        return "%s (at char %d), (line:%d, col:%d)" % \
245
                ( self.msg, self.loc, self.lineno, self.column )
246
    def __repr__( self ):
247
        return _ustr(self)
248
    def markInputline( self, markerString = ">!<" ):
249
        """Extracts the exception line from the input string, and marks
250
           the location of the exception with a special symbol.
251
        """
252
        line_str = self.line
253
        line_column = self.column - 1
254
        if markerString:
255
            line_str = "".join((line_str[:line_column],
256
                                markerString, line_str[line_column:]))
257
        return line_str.strip()
258
    def __dir__(self):
259
        return "lineno col line".split() + dir(type(self))
260

261
class ParseException(ParseBaseException):
262
    """
263
    Exception thrown when parse expressions don't match class;
264
    supported attributes by name are:
265
     - lineno - returns the line number of the exception text
266
     - col - returns the column number of the exception text
267
     - line - returns the line containing the exception text
268
        
269
    Example::
270
        try:
271
            Word(nums).setName("integer").parseString("ABC")
272
        except ParseException as pe:
273
            print(pe)
274
            print("column: {}".format(pe.col))
275
            
276
    prints::
277
       Expected integer (at char 0), (line:1, col:1)
278
        column: 1
279
    """
280
    pass
281

282
class ParseFatalException(ParseBaseException):
283
    """user-throwable exception thrown when inconsistent parse content
284
       is found; stops all parsing immediately"""
285
    pass
286

287
class ParseSyntaxException(ParseFatalException):
288
    """just like L{ParseFatalException}, but thrown internally when an
289
       L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop 
290
       immediately because an unbacktrackable syntax error has been found"""
291
    pass
292

293
#~ class ReparseException(ParseBaseException):
294
    #~ """Experimental class - parse actions can raise this exception to cause
295
       #~ pyparsing to reparse the input string:
296
        #~ - with a modified input string, and/or
297
        #~ - with a modified start location
298
       #~ Set the values of the ReparseException in the constructor, and raise the
299
       #~ exception in a parse action to cause pyparsing to use the new string/location.
300
       #~ Setting the values as None causes no change to be made.
301
       #~ """
302
    #~ def __init_( self, newstring, restartLoc ):
303
        #~ self.newParseText = newstring
304
        #~ self.reparseLoc = restartLoc
305

306
class RecursiveGrammarException(Exception):
307
    """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
308
    def __init__( self, parseElementList ):
309
        self.parseElementTrace = parseElementList
310

311
    def __str__( self ):
312
        return "RecursiveGrammarException: %s" % self.parseElementTrace
313

314
class _ParseResultsWithOffset(object):
315
    def __init__(self,p1,p2):
316
        self.tup = (p1,p2)
317
    def __getitem__(self,i):
318
        return self.tup[i]
319
    def __repr__(self):
320
        return repr(self.tup[0])
321
    def setOffset(self,i):
322
        self.tup = (self.tup[0],i)
323

324
class ParseResults(object):
325
    """
326
    Structured parse results, to provide multiple means of access to the parsed data:
327
       - as a list (C{len(results)})
328
       - by list index (C{results[0], results[1]}, etc.)
329
       - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
330

331
    Example::
332
        integer = Word(nums)
333
        date_str = (integer.setResultsName("year") + '/' 
334
                        + integer.setResultsName("month") + '/' 
335
                        + integer.setResultsName("day"))
336
        # equivalent form:
337
        # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
338

339
        # parseString returns a ParseResults object
340
        result = date_str.parseString("1999/12/31")
341

342
        def test(s, fn=repr):
343
            print("%s -> %s" % (s, fn(eval(s))))
344
        test("list(result)")
345
        test("result[0]")
346
        test("result['month']")
347
        test("result.day")
348
        test("'month' in result")
349
        test("'minutes' in result")
350
        test("result.dump()", str)
351
    prints::
352
        list(result) -> ['1999', '/', '12', '/', '31']
353
        result[0] -> '1999'
354
        result['month'] -> '12'
355
        result.day -> '31'
356
        'month' in result -> True
357
        'minutes' in result -> False
358
        result.dump() -> ['1999', '/', '12', '/', '31']
359
        - day: 31
360
        - month: 12
361
        - year: 1999
362
    """
363
    def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
364
        if isinstance(toklist, cls):
365
            return toklist
366
        retobj = object.__new__(cls)
367
        retobj.__doinit = True
368
        return retobj
369

370
    # Performance tuning: we construct a *lot* of these, so keep this
371
    # constructor as small and fast as possible
372
    def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
373
        if self.__doinit:
374
            self.__doinit = False
375
            self.__name = None
376
            self.__parent = None
377
            self.__accumNames = {}
378
            self.__asList = asList
379
            self.__modal = modal
380
            if toklist is None:
381
                toklist = []
382
            if isinstance(toklist, list):
383
                self.__toklist = toklist[:]
384
            elif isinstance(toklist, _generatorType):
385
                self.__toklist = list(toklist)
386
            else:
387
                self.__toklist = [toklist]
388
            self.__tokdict = dict()
389

390
        if name is not None and name:
391
            if not modal:
392
                self.__accumNames[name] = 0
393
            if isinstance(name,int):
394
                name = _ustr(name) # will always return a str, but use _ustr for consistency
395
            self.__name = name
396
            if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
397
                if isinstance(toklist,basestring):
398
                    toklist = [ toklist ]
399
                if asList:
400
                    if isinstance(toklist,ParseResults):
401
                        self[name] = _ParseResultsWithOffset(toklist.copy(),0)
402
                    else:
403
                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
404
                    self[name].__name = name
405
                else:
406
                    try:
407
                        self[name] = toklist[0]
408
                    except (KeyError,TypeError,IndexError):
409
                        self[name] = toklist
410

411
    def __getitem__( self, i ):
412
        if isinstance( i, (int,slice) ):
413
            return self.__toklist[i]
414
        else:
415
            if i not in self.__accumNames:
416
                return self.__tokdict[i][-1][0]
417
            else:
418
                return ParseResults([ v[0] for v in self.__tokdict[i] ])
419

420
    def __setitem__( self, k, v, isinstance=isinstance ):
421
        if isinstance(v,_ParseResultsWithOffset):
422
            self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
423
            sub = v[0]
424
        elif isinstance(k,(int,slice)):
425
            self.__toklist[k] = v
426
            sub = v
427
        else:
428
            self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
429
            sub = v
430
        if isinstance(sub,ParseResults):
431
            sub.__parent = wkref(self)
432

433
    def __delitem__( self, i ):
434
        if isinstance(i,(int,slice)):
435
            mylen = len( self.__toklist )
436
            del self.__toklist[i]
437

438
            # convert int to slice
439
            if isinstance(i, int):
440
                if i < 0:
441
                    i += mylen
442
                i = slice(i, i+1)
443
            # get removed indices
444
            removed = list(range(*i.indices(mylen)))
445
            removed.reverse()
446
            # fixup indices in token dictionary
447
            for name,occurrences in self.__tokdict.items():
448
                for j in removed:
449
                    for k, (value, position) in enumerate(occurrences):
450
                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
451
        else:
452
            del self.__tokdict[i]
453

454
    def __contains__( self, k ):
455
        return k in self.__tokdict
456

457
    def __len__( self ): return len( self.__toklist )
458
    def __bool__(self): return ( not not self.__toklist )
459
    __nonzero__ = __bool__
460
    def __iter__( self ): return iter( self.__toklist )
461
    def __reversed__( self ): return iter( self.__toklist[::-1] )
462
    def _iterkeys( self ):
463
        if hasattr(self.__tokdict, "iterkeys"):
464
            return self.__tokdict.iterkeys()
465
        else:
466
            return iter(self.__tokdict)
467

468
    def _itervalues( self ):
469
        return (self[k] for k in self._iterkeys())
470
            
471
    def _iteritems( self ):
472
        return ((k, self[k]) for k in self._iterkeys())
473

474
    if PY_3:
475
        keys = _iterkeys       
476
        """Returns an iterator of all named result keys (Python 3.x only)."""
477

478
        values = _itervalues
479
        """Returns an iterator of all named result values (Python 3.x only)."""
480

481
        items = _iteritems
482
        """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
483

484
    else:
485
        iterkeys = _iterkeys
486
        """Returns an iterator of all named result keys (Python 2.x only)."""
487

488
        itervalues = _itervalues
489
        """Returns an iterator of all named result values (Python 2.x only)."""
490

491
        iteritems = _iteritems
492
        """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
493

494
        def keys( self ):
495
            """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
496
            return list(self.iterkeys())
497

498
        def values( self ):
499
            """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
500
            return list(self.itervalues())
501
                
502
        def items( self ):
503
            """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
504
            return list(self.iteritems())
505

506
    def haskeys( self ):
507
        """Since keys() returns an iterator, this method is helpful in bypassing
508
           code that looks for the existence of any defined results names."""
509
        return bool(self.__tokdict)
510
        
511
    def pop( self, *args, **kwargs):
512
        """
513
        Removes and returns item at specified index (default=C{last}).
514
        Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
515
        argument or an integer argument, it will use C{list} semantics
516
        and pop tokens from the list of parsed tokens. If passed a 
517
        non-integer argument (most likely a string), it will use C{dict}
518
        semantics and pop the corresponding value from any defined 
519
        results names. A second default return value argument is 
520
        supported, just as in C{dict.pop()}.
521

522
        Example::
523
            def remove_first(tokens):
524
                tokens.pop(0)
525
            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
526
            print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
527

528
            label = Word(alphas)
529
            patt = label("LABEL") + OneOrMore(Word(nums))
530
            print(patt.parseString("AAB 123 321").dump())
531

532
            # Use pop() in a parse action to remove named result (note that corresponding value is not
533
            # removed from list form of results)
534
            def remove_LABEL(tokens):
535
                tokens.pop("LABEL")
536
                return tokens
537
            patt.addParseAction(remove_LABEL)
538
            print(patt.parseString("AAB 123 321").dump())
539
        prints::
540
            ['AAB', '123', '321']
541
            - LABEL: AAB
542

543
            ['AAB', '123', '321']
544
        """
545
        if not args:
546
            args = [-1]
547
        for k,v in kwargs.items():
548
            if k == 'default':
549
                args = (args[0], v)
550
            else:
551
                raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
552
        if (isinstance(args[0], int) or 
553
                        len(args) == 1 or 
554
                        args[0] in self):
555
            index = args[0]
556
            ret = self[index]
557
            del self[index]
558
            return ret
559
        else:
560
            defaultvalue = args[1]
561
            return defaultvalue
562

563
    def get(self, key, defaultValue=None):
564
        """
565
        Returns named result matching the given key, or if there is no
566
        such name, then returns the given C{defaultValue} or C{None} if no
567
        C{defaultValue} is specified.
568

569
        Similar to C{dict.get()}.
570
        
571
        Example::
572
            integer = Word(nums)
573
            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")           
574

575
            result = date_str.parseString("1999/12/31")
576
            print(result.get("year")) # -> '1999'
577
            print(result.get("hour", "not specified")) # -> 'not specified'
578
            print(result.get("hour")) # -> None
579
        """
580
        if key in self:
581
            return self[key]
582
        else:
583
            return defaultValue
584

585
    def insert( self, index, insStr ):
586
        """
587
        Inserts new element at location index in the list of parsed tokens.
588
        
589
        Similar to C{list.insert()}.
590

591
        Example::
592
            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
593

594
            # use a parse action to insert the parse location in the front of the parsed results
595
            def insert_locn(locn, tokens):
596
                tokens.insert(0, locn)
597
            print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
598
        """
599
        self.__toklist.insert(index, insStr)
600
        # fixup indices in token dictionary
601
        for name,occurrences in self.__tokdict.items():
602
            for k, (value, position) in enumerate(occurrences):
603
                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
604

605
    def append( self, item ):
606
        """
607
        Add single element to end of ParseResults list of elements.
608

609
        Example::
610
            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
611
            
612
            # use a parse action to compute the sum of the parsed integers, and add it to the end
613
            def append_sum(tokens):
614
                tokens.append(sum(map(int, tokens)))
615
            print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
616
        """
617
        self.__toklist.append(item)
618

619
    def extend( self, itemseq ):
620
        """
621
        Add sequence of elements to end of ParseResults list of elements.
622

623
        Example::
624
            patt = OneOrMore(Word(alphas))
625
            
626
            # use a parse action to append the reverse of the matched strings, to make a palindrome
627
            def make_palindrome(tokens):
628
                tokens.extend(reversed([t[::-1] for t in tokens]))
629
                return ''.join(tokens)
630
            print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
631
        """
632
        if isinstance(itemseq, ParseResults):
633
            self += itemseq
634
        else:
635
            self.__toklist.extend(itemseq)
636

637
    def clear( self ):
638
        """
639
        Clear all elements and results names.
640
        """
641
        del self.__toklist[:]
642
        self.__tokdict.clear()
643

644
    def __getattr__( self, name ):
645
        try:
646
            return self[name]
647
        except KeyError:
648
            return ""
649
            
650
        if name in self.__tokdict:
651
            if name not in self.__accumNames:
652
                return self.__tokdict[name][-1][0]
653
            else:
654
                return ParseResults([ v[0] for v in self.__tokdict[name] ])
655
        else:
656
            return ""
657

658
    def __add__( self, other ):
659
        ret = self.copy()
660
        ret += other
661
        return ret
662

663
    def __iadd__( self, other ):
664
        if other.__tokdict:
665
            offset = len(self.__toklist)
666
            addoffset = lambda a: offset if a<0 else a+offset
667
            otheritems = other.__tokdict.items()
668
            otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
669
                                for (k,vlist) in otheritems for v in vlist]
670
            for k,v in otherdictitems:
671
                self[k] = v
672
                if isinstance(v[0],ParseResults):
673
                    v[0].__parent = wkref(self)
674
            
675
        self.__toklist += other.__toklist
676
        self.__accumNames.update( other.__accumNames )
677
        return self
678

679
    def __radd__(self, other):
680
        if isinstance(other,int) and other == 0:
681
            # useful for merging many ParseResults using sum() builtin
682
            return self.copy()
683
        else:
684
            # this may raise a TypeError - so be it
685
            return other + self
686
        
687
    def __repr__( self ):
688
        return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
689

690
    def __str__( self ):
691
        return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
692

693
    def _asStringList( self, sep='' ):
694
        out = []
695
        for item in self.__toklist:
696
            if out and sep:
697
                out.append(sep)
698
            if isinstance( item, ParseResults ):
699
                out += item._asStringList()
700
            else:
701
                out.append( _ustr(item) )
702
        return out
703

704
    def asList( self ):
705
        """
706
        Returns the parse results as a nested list of matching tokens, all converted to strings.
707

708
        Example::
709
            patt = OneOrMore(Word(alphas))
710
            result = patt.parseString("sldkj lsdkj sldkj")
711
            # even though the result prints in string-like form, it is actually a pyparsing ParseResults
712
            print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
713
            
714
            # Use asList() to create an actual list
715
            result_list = result.asList()
716
            print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
717
        """
718
        return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
719

720
    def asDict( self ):
721
        """
722
        Returns the named parse results as a nested dictionary.
723

724
        Example::
725
            integer = Word(nums)
726
            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
727
            
728
            result = date_str.parseString('12/31/1999')
729
            print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
730
            
731
            result_dict = result.asDict()
732
            print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
733

734
            # even though a ParseResults supports dict-like access, sometime you just need to have a dict
735
            import json
736
            print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
737
            print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
738
        """
739
        if PY_3:
740
            item_fn = self.items
741
        else:
742
            item_fn = self.iteritems
743
            
744
        def toItem(obj):
745
            if isinstance(obj, ParseResults):
746
                if obj.haskeys():
747
                    return obj.asDict()
748
                else:
749
                    return [toItem(v) for v in obj]
750
            else:
751
                return obj
752
                
753
        return dict((k,toItem(v)) for k,v in item_fn())
754

755
    def copy( self ):
756
        """
757
        Returns a new copy of a C{ParseResults} object.
758
        """
759
        ret = ParseResults( self.__toklist )
760
        ret.__tokdict = self.__tokdict.copy()
761
        ret.__parent = self.__parent
762
        ret.__accumNames.update( self.__accumNames )
763
        ret.__name = self.__name
764
        return ret
765

766
    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
767
        """
768
        (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
769
        """
770
        nl = "\n"
771
        out = []
772
        namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
773
                                                            for v in vlist)
774
        nextLevelIndent = indent + "  "
775

776
        # collapse out indents if formatting is not desired
777
        if not formatted:
778
            indent = ""
779
            nextLevelIndent = ""
780
            nl = ""
781

782
        selfTag = None
783
        if doctag is not None:
784
            selfTag = doctag
785
        else:
786
            if self.__name:
787
                selfTag = self.__name
788

789
        if not selfTag:
790
            if namedItemsOnly:
791
                return ""
792
            else:
793
                selfTag = "ITEM"
794

795
        out += [ nl, indent, "<", selfTag, ">" ]
796

797
        for i,res in enumerate(self.__toklist):
798
            if isinstance(res,ParseResults):
799
                if i in namedItems:
800
                    out += [ res.asXML(namedItems[i],
801
                                        namedItemsOnly and doctag is None,
802
                                        nextLevelIndent,
803
                                        formatted)]
804
                else:
805
                    out += [ res.asXML(None,
806
                                        namedItemsOnly and doctag is None,
807
                                        nextLevelIndent,
808
                                        formatted)]
809
            else:
810
                # individual token, see if there is a name for it
811
                resTag = None
812
                if i in namedItems:
813
                    resTag = namedItems[i]
814
                if not resTag:
815
                    if namedItemsOnly:
816
                        continue
817
                    else:
818
                        resTag = "ITEM"
819
                xmlBodyText = _xml_escape(_ustr(res))
820
                out += [ nl, nextLevelIndent, "<", resTag, ">",
821
                                                xmlBodyText,
822
                                                "</", resTag, ">" ]
823

824
        out += [ nl, indent, "</", selfTag, ">" ]
825
        return "".join(out)
826

827
    def __lookup(self,sub):
828
        for k,vlist in self.__tokdict.items():
829
            for v,loc in vlist:
830
                if sub is v:
831
                    return k
832
        return None
833

834
    def getName(self):
835
        r"""
836
        Returns the results name for this token expression. Useful when several 
837
        different expressions might match at a particular location.
838

839
        Example::
840
            integer = Word(nums)
841
            ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
842
            house_number_expr = Suppress('#') + Word(nums, alphanums)
843
            user_data = (Group(house_number_expr)("house_number") 
844
                        | Group(ssn_expr)("ssn")
845
                        | Group(integer)("age"))
846
            user_info = OneOrMore(user_data)
847
            
848
            result = user_info.parseString("22 111-22-3333 #221B")
849
            for item in result:
850
                print(item.getName(), ':', item[0])
851
        prints::
852
            age : 22
853
            ssn : 111-22-3333
854
            house_number : 221B
855
        """
856
        if self.__name:
857
            return self.__name
858
        elif self.__parent:
859
            par = self.__parent()
860
            if par:
861
                return par.__lookup(self)
862
            else:
863
                return None
864
        elif (len(self) == 1 and
865
               len(self.__tokdict) == 1 and
866
               next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
867
            return next(iter(self.__tokdict.keys()))
868
        else:
869
            return None
870

871
    def dump(self, indent='', depth=0, full=True):
872
        """
873
        Diagnostic method for listing out the contents of a C{ParseResults}.
874
        Accepts an optional C{indent} argument so that this string can be embedded
875
        in a nested display of other data.
876

877
        Example::
878
            integer = Word(nums)
879
            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
880
            
881
            result = date_str.parseString('12/31/1999')
882
            print(result.dump())
883
        prints::
884
            ['12', '/', '31', '/', '1999']
885
            - day: 1999
886
            - month: 31
887
            - year: 12
888
        """
889
        out = []
890
        NL = '\n'
891
        out.append( indent+_ustr(self.asList()) )
892
        if full:
893
            if self.haskeys():
894
                items = sorted((str(k), v) for k,v in self.items())
895
                for k,v in items:
896
                    if out:
897
                        out.append(NL)
898
                    out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
899
                    if isinstance(v,ParseResults):
900
                        if v:
901
                            out.append( v.dump(indent,depth+1) )
902
                        else:
903
                            out.append(_ustr(v))
904
                    else:
905
                        out.append(repr(v))
906
            elif any(isinstance(vv,ParseResults) for vv in self):
907
                v = self
908
                for i,vv in enumerate(v):
909
                    if isinstance(vv,ParseResults):
910
                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),vv.dump(indent,depth+1) ))
911
                    else:
912
                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),_ustr(vv)))
913
            
914
        return "".join(out)
915

916
    def pprint(self, *args, **kwargs):
917
        """
918
        Pretty-printer for parsed results as a list, using the C{pprint} module.
919
        Accepts additional positional or keyword args as defined for the 
920
        C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
921

922
        Example::
923
            ident = Word(alphas, alphanums)
924
            num = Word(nums)
925
            func = Forward()
926
            term = ident | num | Group('(' + func + ')')
927
            func <<= ident + Group(Optional(delimitedList(term)))
928
            result = func.parseString("fna a,b,(fnb c,d,200),100")
929
            result.pprint(width=40)
930
        prints::
931
            ['fna',
932
             ['a',
933
              'b',
934
              ['(', 'fnb', ['c', 'd', '200'], ')'],
935
              '100']]
936
        """
937
        pprint.pprint(self.asList(), *args, **kwargs)
938

939
    # add support for pickle protocol
940
    def __getstate__(self):
941
        return ( self.__toklist,
942
                 ( self.__tokdict.copy(),
943
                   self.__parent is not None and self.__parent() or None,
944
                   self.__accumNames,
945
                   self.__name ) )
946

947
    def __setstate__(self,state):
948
        self.__toklist = state[0]
949
        (self.__tokdict,
950
         par,
951
         inAccumNames,
952
         self.__name) = state[1]
953
        self.__accumNames = {}
954
        self.__accumNames.update(inAccumNames)
955
        if par is not None:
956
            self.__parent = wkref(par)
957
        else:
958
            self.__parent = None
959

960
    def __getnewargs__(self):
961
        return self.__toklist, self.__name, self.__asList, self.__modal
962

963
    def __dir__(self):
964
        return (dir(type(self)) + list(self.keys()))
965

966
MutableMapping.register(ParseResults)
967

968
def col (loc,strg):
969
    """Returns current column within a string, counting newlines as line separators.
970
   The first column is number 1.
971

972
   Note: the default parsing behavior is to expand tabs in the input string
973
   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
974
   on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
975
   consistent view of the parsed string, the parse location, and line and column
976
   positions within the parsed string.
977
   """
978
    s = strg
979
    return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
980

981
def lineno(loc,strg):
982
    """Returns current line number within a string, counting newlines as line separators.
983
   The first line is number 1.
984

985
   Note: the default parsing behavior is to expand tabs in the input string
986
   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
987
   on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
988
   consistent view of the parsed string, the parse location, and line and column
989
   positions within the parsed string.
990
   """
991
    return strg.count("\n",0,loc) + 1
992

993
def line( loc, strg ):
994
    """Returns the line of text containing loc within a string, counting newlines as line separators.
995
       """
996
    lastCR = strg.rfind("\n", 0, loc)
997
    nextCR = strg.find("\n", loc)
998
    if nextCR >= 0:
999
        return strg[lastCR+1:nextCR]
1000
    else:
1001
        return strg[lastCR+1:]
1002

1003
def _defaultStartDebugAction( instring, loc, expr ):
1004
    print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
1005

1006
def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
1007
    print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1008

1009
def _defaultExceptionDebugAction( instring, loc, expr, exc ):
1010
    print ("Exception raised:" + _ustr(exc))
1011

1012
def nullDebugAction(*args):
1013
    """'Do-nothing' debug action, to suppress debugging output during parsing."""
1014
    pass
1015

1016
# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
1017
#~ 'decorator to trim function calls to match the arity of the target'
1018
#~ def _trim_arity(func, maxargs=3):
1019
    #~ if func in singleArgBuiltins:
1020
        #~ return lambda s,l,t: func(t)
1021
    #~ limit = 0
1022
    #~ foundArity = False
1023
    #~ def wrapper(*args):
1024
        #~ nonlocal limit,foundArity
1025
        #~ while 1:
1026
            #~ try:
1027
                #~ ret = func(*args[limit:])
1028
                #~ foundArity = True
1029
                #~ return ret
1030
            #~ except TypeError:
1031
                #~ if limit == maxargs or foundArity:
1032
                    #~ raise
1033
                #~ limit += 1
1034
                #~ continue
1035
    #~ return wrapper
1036

1037
# this version is Python 2.x-3.x cross-compatible
1038
'decorator to trim function calls to match the arity of the target'
1039
def _trim_arity(func, maxargs=2):
1040
    if func in singleArgBuiltins:
1041
        return lambda s,l,t: func(t)
1042
    limit = [0]
1043
    foundArity = [False]
1044
    
1045
    # traceback return data structure changed in Py3.5 - normalize back to plain tuples
1046
    if system_version[:2] >= (3,5):
1047
        def extract_stack(limit=0):
1048
            # special handling for Python 3.5.0 - extra deep call stack by 1
1049
            offset = -3 if system_version == (3,5,0) else -2
1050
            frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
1051
            return [frame_summary[:2]]
1052
        def extract_tb(tb, limit=0):
1053
            frames = traceback.extract_tb(tb, limit=limit)
1054
            frame_summary = frames[-1]
1055
            return [frame_summary[:2]]
1056
    else:
1057
        extract_stack = traceback.extract_stack
1058
        extract_tb = traceback.extract_tb
1059
    
1060
    # synthesize what would be returned by traceback.extract_stack at the call to 
1061
    # user's parse action 'func', so that we don't incur call penalty at parse time
1062
    
1063
    LINE_DIFF = 6
1064
    # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 
1065
    # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
1066
    this_line = extract_stack(limit=2)[-1]
1067
    pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
1068

1069
    def wrapper(*args):
1070
        while 1:
1071
            try:
1072
                ret = func(*args[limit[0]:])
1073
                foundArity[0] = True
1074
                return ret
1075
            except TypeError:
1076
                # re-raise TypeErrors if they did not come from our arity testing
1077
                if foundArity[0]:
1078
                    raise
1079
                else:
1080
                    try:
1081
                        tb = sys.exc_info()[-1]
1082
                        if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1083
                            raise
1084
                    finally:
1085
                        del tb
1086

1087
                if limit[0] <= maxargs:
1088
                    limit[0] += 1
1089
                    continue
1090
                raise
1091

1092
    # copy func name to wrapper for sensible debug output
1093
    func_name = "<parse action>"
1094
    try:
1095
        func_name = getattr(func, '__name__', 
1096
                            getattr(func, '__class__').__name__)
1097
    except Exception:
1098
        func_name = str(func)
1099
    wrapper.__name__ = func_name
1100

1101
    return wrapper
1102

1103
class ParserElement(object):
1104
    """Abstract base level parser element class."""
1105
    DEFAULT_WHITE_CHARS = " \n\t\r"
1106
    verbose_stacktrace = False
1107

1108
    @staticmethod
1109
    def setDefaultWhitespaceChars( chars ):
1110
        r"""
1111
        Overrides the default whitespace chars
1112

1113
        Example::
1114
            # default whitespace chars are space, <TAB> and newline
1115
            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl']
1116
            
1117
            # change to just treat newline as significant
1118
            ParserElement.setDefaultWhitespaceChars(" \t")
1119
            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def']
1120
        """
1121
        ParserElement.DEFAULT_WHITE_CHARS = chars
1122

1123
    @staticmethod
1124
    def inlineLiteralsUsing(cls):
1125
        """
1126
        Set class to be used for inclusion of string literals into a parser.
1127
        
1128
        Example::
1129
            # default literal class used is Literal
1130
            integer = Word(nums)
1131
            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")           
1132

1133
            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
1134

1135

1136
            # change to Suppress
1137
            ParserElement.inlineLiteralsUsing(Suppress)
1138
            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")           
1139

1140
            date_str.parseString("1999/12/31")  # -> ['1999', '12', '31']
1141
        """
1142
        ParserElement._literalStringClass = cls
1143

1144
    def __init__( self, savelist=False ):
1145
        self.parseAction = list()
1146
        self.failAction = None
1147
        #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
1148
        self.strRepr = None
1149
        self.resultsName = None
1150
        self.saveAsList = savelist
1151
        self.skipWhitespace = True
1152
        self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1153
        self.copyDefaultWhiteChars = True
1154
        self.mayReturnEmpty = False # used when checking for left-recursion
1155
        self.keepTabs = False
1156
        self.ignoreExprs = list()
1157
        self.debug = False
1158
        self.streamlined = False
1159
        self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
1160
        self.errmsg = ""
1161
        self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
1162
        self.debugActions = ( None, None, None ) #custom debug actions
1163
        self.re = None
1164
        self.callPreparse = True # used to avoid redundant calls to preParse
1165
        self.callDuringTry = False
1166

1167
    def copy( self ):
1168
        """
1169
        Make a copy of this C{ParserElement}.  Useful for defining different parse actions
1170
        for the same parsing pattern, using copies of the original parse element.
1171
        
1172
        Example::
1173
            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1174
            integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
1175
            integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1176
            
1177
            print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1178
        prints::
1179
            [5120, 100, 655360, 268435456]
1180
        Equivalent form of C{expr.copy()} is just C{expr()}::
1181
            integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1182
        """
1183
        cpy = copy.copy( self )
1184
        cpy.parseAction = self.parseAction[:]
1185
        cpy.ignoreExprs = self.ignoreExprs[:]
1186
        if self.copyDefaultWhiteChars:
1187
            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1188
        return cpy
1189

1190
    def setName( self, name ):
1191
        """
1192
        Define name for this expression, makes debugging and exception messages clearer.
1193
        
1194
        Example::
1195
            Word(nums).parseString("ABC")  # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1196
            Word(nums).setName("integer").parseString("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1)
1197
        """
1198
        self.name = name
1199
        self.errmsg = "Expected " + self.name
1200
        if hasattr(self,"exception"):
1201
            self.exception.msg = self.errmsg
1202
        return self
1203

1204
    def setResultsName( self, name, listAllMatches=False ):
1205
        """
1206
        Define name for referencing matching tokens as a nested attribute
1207
        of the returned parse results.
1208
        NOTE: this returns a *copy* of the original C{ParserElement} object;
1209
        this is so that the client can define a basic element, such as an
1210
        integer, and reference it in multiple places with different names.
1211

1212
        You can also set results names using the abbreviated syntax,
1213
        C{expr("name")} in place of C{expr.setResultsName("name")} - 
1214
        see L{I{__call__}<__call__>}.
1215

1216
        Example::
1217
            date_str = (integer.setResultsName("year") + '/' 
1218
                        + integer.setResultsName("month") + '/' 
1219
                        + integer.setResultsName("day"))
1220

1221
            # equivalent form:
1222
            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1223
        """
1224
        newself = self.copy()
1225
        if name.endswith("*"):
1226
            name = name[:-1]
1227
            listAllMatches=True
1228
        newself.resultsName = name
1229
        newself.modalResults = not listAllMatches
1230
        return newself
1231

1232
    def setBreak(self,breakFlag = True):
1233
        """Method to invoke the Python pdb debugger when this element is
1234
           about to be parsed. Set C{breakFlag} to True to enable, False to
1235
           disable.
1236
        """
1237
        if breakFlag:
1238
            _parseMethod = self._parse
1239
            def breaker(instring, loc, doActions=True, callPreParse=True):
1240
                import pdb
1241
                pdb.set_trace()
1242
                return _parseMethod( instring, loc, doActions, callPreParse )
1243
            breaker._originalParseMethod = _parseMethod
1244
            self._parse = breaker
1245
        else:
1246
            if hasattr(self._parse,"_originalParseMethod"):
1247
                self._parse = self._parse._originalParseMethod
1248
        return self
1249

1250
    def setParseAction( self, *fns, **kwargs ):
1251
        """
1252
        Define one or more actions to perform when successfully matching parse element definition.
1253
        Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
1254
        C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
1255
         - s   = the original string being parsed (see note below)
1256
         - loc = the location of the matching substring
1257
         - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
1258
        If the functions in fns modify the tokens, they can return them as the return
1259
        value from fn, and the modified list of tokens will replace the original.
1260
        Otherwise, fn does not need to return any value.
1261

1262
        Optional keyword arguments:
1263
         - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
1264

1265
        Note: the default parsing behavior is to expand tabs in the input string
1266
        before starting the parsing process.  See L{I{parseString}<parseString>} for more information
1267
        on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
1268
        consistent view of the parsed string, the parse location, and line and column
1269
        positions within the parsed string.
1270
        
1271
        Example::
1272
            integer = Word(nums)
1273
            date_str = integer + '/' + integer + '/' + integer
1274

1275
            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
1276

1277
            # use parse action to convert to ints at parse time
1278
            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1279
            date_str = integer + '/' + integer + '/' + integer
1280

1281
            # note that integer fields are now ints, not strings
1282
            date_str.parseString("1999/12/31")  # -> [1999, '/', 12, '/', 31]
1283
        """
1284
        self.parseAction = list(map(_trim_arity, list(fns)))
1285
        self.callDuringTry = kwargs.get("callDuringTry", False)
1286
        return self
1287

1288
    def addParseAction( self, *fns, **kwargs ):
1289
        """
1290
        Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
1291
        
1292
        See examples in L{I{copy}<copy>}.
1293
        """
1294
        self.parseAction += list(map(_trim_arity, list(fns)))
1295
        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1296
        return self
1297

1298
    def addCondition(self, *fns, **kwargs):
1299
        """Add a boolean predicate function to expression's list of parse actions. See 
1300
        L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 
1301
        functions passed to C{addCondition} need to return boolean success/fail of the condition.
1302

1303
        Optional keyword arguments:
1304
         - message = define a custom message to be used in the raised exception
1305
         - fatal   = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1306
         
1307
        Example::
1308
            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1309
            year_int = integer.copy()
1310
            year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1311
            date_str = year_int + '/' + integer + '/' + integer
1312

1313
            result = date_str.parseString("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1314
        """
1315
        msg = kwargs.get("message", "failed user-defined condition")
1316
        exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
1317
        for fn in fns:
1318
            def pa(s,l,t):
1319
                if not bool(_trim_arity(fn)(s,l,t)):
1320
                    raise exc_type(s,l,msg)
1321
            self.parseAction.append(pa)
1322
        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1323
        return self
1324

1325
    def setFailAction( self, fn ):
1326
        """Define action to perform if parsing fails at this expression.
1327
           Fail acton fn is a callable function that takes the arguments
1328
           C{fn(s,loc,expr,err)} where:
1329
            - s = string being parsed
1330
            - loc = location where expression match was attempted and failed
1331
            - expr = the parse expression that failed
1332
            - err = the exception thrown
1333
           The function returns no value.  It may throw C{L{ParseFatalException}}
1334
           if it is desired to stop parsing immediately."""
1335
        self.failAction = fn
1336
        return self
1337

1338
    def _skipIgnorables( self, instring, loc ):
1339
        exprsFound = True
1340
        while exprsFound:
1341
            exprsFound = False
1342
            for e in self.ignoreExprs:
1343
                try:
1344
                    while 1:
1345
                        loc,dummy = e._parse( instring, loc )
1346
                        exprsFound = True
1347
                except ParseException:
1348
                    pass
1349
        return loc
1350

1351
    def preParse( self, instring, loc ):
1352
        if self.ignoreExprs:
1353
            loc = self._skipIgnorables( instring, loc )
1354

1355
        if self.skipWhitespace:
1356
            wt = self.whiteChars
1357
            instrlen = len(instring)
1358
            while loc < instrlen and instring[loc] in wt:
1359
                loc += 1
1360

1361
        return loc
1362

1363
    def parseImpl( self, instring, loc, doActions=True ):
1364
        return loc, []
1365

1366
    def postParse( self, instring, loc, tokenlist ):
1367
        return tokenlist
1368

1369
    #~ @profile
1370
    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1371
        debugging = ( self.debug ) #and doActions )
1372

1373
        if debugging or self.failAction:
1374
            #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
1375
            if (self.debugActions[0] ):
1376
                self.debugActions[0]( instring, loc, self )
1377
            if callPreParse and self.callPreparse:
1378
                preloc = self.preParse( instring, loc )
1379
            else:
1380
                preloc = loc
1381
            tokensStart = preloc
1382
            try:
1383
                try:
1384
                    loc,tokens = self.parseImpl( instring, preloc, doActions )
1385
                except IndexError:
1386
                    raise ParseException( instring, len(instring), self.errmsg, self )
1387
            except ParseBaseException as err:
1388
                #~ print ("Exception raised:", err)
1389
                if self.debugActions[2]:
1390
                    self.debugActions[2]( instring, tokensStart, self, err )
1391
                if self.failAction:
1392
                    self.failAction( instring, tokensStart, self, err )
1393
                raise
1394
        else:
1395
            if callPreParse and self.callPreparse:
1396
                preloc = self.preParse( instring, loc )
1397
            else:
1398
                preloc = loc
1399
            tokensStart = preloc
1400
            if self.mayIndexError or preloc >= len(instring):
1401
                try:
1402
                    loc,tokens = self.parseImpl( instring, preloc, doActions )
1403
                except IndexError:
1404
                    raise ParseException( instring, len(instring), self.errmsg, self )
1405
            else:
1406
                loc,tokens = self.parseImpl( instring, preloc, doActions )
1407

1408
        tokens = self.postParse( instring, loc, tokens )
1409

1410
        retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1411
        if self.parseAction and (doActions or self.callDuringTry):
1412
            if debugging:
1413
                try:
1414
                    for fn in self.parseAction:
1415
                        tokens = fn( instring, tokensStart, retTokens )
1416
                        if tokens is not None:
1417
                            retTokens = ParseResults( tokens,
1418
                                                      self.resultsName,
1419
                                                      asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1420
                                                      modal=self.modalResults )
1421
                except ParseBaseException as err:
1422
                    #~ print "Exception raised in user parse action:", err
1423
                    if (self.debugActions[2] ):
1424
                        self.debugActions[2]( instring, tokensStart, self, err )
1425
                    raise
1426
            else:
1427
                for fn in self.parseAction:
1428
                    tokens = fn( instring, tokensStart, retTokens )
1429
                    if tokens is not None:
1430
                        retTokens = ParseResults( tokens,
1431
                                                  self.resultsName,
1432
                                                  asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1433
                                                  modal=self.modalResults )
1434
        if debugging:
1435
            #~ print ("Matched",self,"->",retTokens.asList())
1436
            if (self.debugActions[1] ):
1437
                self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1438

1439
        return loc, retTokens
1440

1441
    def tryParse( self, instring, loc ):
1442
        try:
1443
            return self._parse( instring, loc, doActions=False )[0]
1444
        except ParseFatalException:
1445
            raise ParseException( instring, loc, self.errmsg, self)
1446
    
1447
    def canParseNext(self, instring, loc):
1448
        try:
1449
            self.tryParse(instring, loc)
1450
        except (ParseException, IndexError):
1451
            return False
1452
        else:
1453
            return True
1454

1455
    class _UnboundedCache(object):
1456
        def __init__(self):
1457
            cache = {}
1458
            self.not_in_cache = not_in_cache = object()
1459

1460
            def get(self, key):
1461
                return cache.get(key, not_in_cache)
1462

1463
            def set(self, key, value):
1464
                cache[key] = value
1465

1466
            def clear(self):
1467
                cache.clear()
1468
                
1469
            def cache_len(self):
1470
                return len(cache)
1471

1472
            self.get = types.MethodType(get, self)
1473
            self.set = types.MethodType(set, self)
1474
            self.clear = types.MethodType(clear, self)
1475
            self.__len__ = types.MethodType(cache_len, self)
1476

1477
    if _OrderedDict is not None:
1478
        class _FifoCache(object):
1479
            def __init__(self, size):
1480
                self.not_in_cache = not_in_cache = object()
1481

1482
                cache = _OrderedDict()
1483

1484
                def get(self, key):
1485
                    return cache.get(key, not_in_cache)
1486

1487
                def set(self, key, value):
1488
                    cache[key] = value
1489
                    while len(cache) > size:
1490
                        try:
1491
                            cache.popitem(False)
1492
                        except KeyError:
1493
                            pass
1494

1495
                def clear(self):
1496
                    cache.clear()
1497

1498
                def cache_len(self):
1499
                    return len(cache)
1500

1501
                self.get = types.MethodType(get, self)
1502
                self.set = types.MethodType(set, self)
1503
                self.clear = types.MethodType(clear, self)
1504
                self.__len__ = types.MethodType(cache_len, self)
1505

1506
    else:
1507
        class _FifoCache(object):
1508
            def __init__(self, size):
1509
                self.not_in_cache = not_in_cache = object()
1510

1511
                cache = {}
1512
                key_fifo = collections.deque([], size)
1513

1514
                def get(self, key):
1515
                    return cache.get(key, not_in_cache)
1516

1517
                def set(self, key, value):
1518
                    cache[key] = value
1519
                    while len(key_fifo) > size:
1520
                        cache.pop(key_fifo.popleft(), None)
1521
                    key_fifo.append(key)
1522

1523
                def clear(self):
1524
                    cache.clear()
1525
                    key_fifo.clear()
1526

1527
                def cache_len(self):
1528
                    return len(cache)
1529

1530
                self.get = types.MethodType(get, self)
1531
                self.set = types.MethodType(set, self)
1532
                self.clear = types.MethodType(clear, self)
1533
                self.__len__ = types.MethodType(cache_len, self)
1534

1535
    # argument cache for optimizing repeated calls when backtracking through recursive expressions
1536
    packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
1537
    packrat_cache_lock = RLock()
1538
    packrat_cache_stats = [0, 0]
1539

1540
    # this method gets repeatedly called during backtracking with the same arguments -
1541
    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1542
    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1543
        HIT, MISS = 0, 1
1544
        lookup = (self, instring, loc, callPreParse, doActions)
1545
        with ParserElement.packrat_cache_lock:
1546
            cache = ParserElement.packrat_cache
1547
            value = cache.get(lookup)
1548
            if value is cache.not_in_cache:
1549
                ParserElement.packrat_cache_stats[MISS] += 1
1550
                try:
1551
                    value = self._parseNoCache(instring, loc, doActions, callPreParse)
1552
                except ParseBaseException as pe:
1553
                    # cache a copy of the exception, without the traceback
1554
                    cache.set(lookup, pe.__class__(*pe.args))
1555
                    raise
1556
                else:
1557
                    cache.set(lookup, (value[0], value[1].copy()))
1558
                    return value
1559
            else:
1560
                ParserElement.packrat_cache_stats[HIT] += 1
1561
                if isinstance(value, Exception):
1562
                    raise value
1563
                return (value[0], value[1].copy())
1564

1565
    _parse = _parseNoCache
1566

1567
    @staticmethod
1568
    def resetCache():
1569
        ParserElement.packrat_cache.clear()
1570
        ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1571

1572
    _packratEnabled = False
1573
    @staticmethod
1574
    def enablePackrat(cache_size_limit=128):
1575
        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1576
           Repeated parse attempts at the same string location (which happens
1577
           often in many complex grammars) can immediately return a cached value,
1578
           instead of re-executing parsing/validating code.  Memoizing is done of
1579
           both valid results and parsing exceptions.
1580
           
1581
           Parameters:
1582
            - cache_size_limit - (default=C{128}) - if an integer value is provided
1583
              will limit the size of the packrat cache; if None is passed, then
1584
              the cache size will be unbounded; if 0 is passed, the cache will
1585
              be effectively disabled.
1586
            
1587
           This speedup may break existing programs that use parse actions that
1588
           have side-effects.  For this reason, packrat parsing is disabled when
1589
           you first import pyparsing.  To activate the packrat feature, your
1590
           program must call the class method C{ParserElement.enablePackrat()}.  If
1591
           your program uses C{psyco} to "compile as you go", you must call
1592
           C{enablePackrat} before calling C{psyco.full()}.  If you do not do this,
1593
           Python will crash.  For best results, call C{enablePackrat()} immediately
1594
           after importing pyparsing.
1595
           
1596
           Example::
1597
               import pyparsing
1598
               pyparsing.ParserElement.enablePackrat()
1599
        """
1600
        if not ParserElement._packratEnabled:
1601
            ParserElement._packratEnabled = True
1602
            if cache_size_limit is None:
1603
                ParserElement.packrat_cache = ParserElement._UnboundedCache()
1604
            else:
1605
                ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1606
            ParserElement._parse = ParserElement._parseCache
1607

1608
    def parseString( self, instring, parseAll=False ):
1609
        """
1610
        Execute the parse expression with the given string.
1611
        This is the main interface to the client code, once the complete
1612
        expression has been built.
1613

1614
        If you want the grammar to require that the entire input string be
1615
        successfully parsed, then set C{parseAll} to True (equivalent to ending
1616
        the grammar with C{L{StringEnd()}}).
1617

1618
        Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1619
        in order to report proper column numbers in parse actions.
1620
        If the input string contains tabs and
1621
        the grammar uses parse actions that use the C{loc} argument to index into the
1622
        string being parsed, you can ensure you have a consistent view of the input
1623
        string by:
1624
         - calling C{parseWithTabs} on your grammar before calling C{parseString}
1625
           (see L{I{parseWithTabs}<parseWithTabs>})
1626
         - define your parse action using the full C{(s,loc,toks)} signature, and
1627
           reference the input string using the parse action's C{s} argument
1628
         - explictly expand the tabs in your input string before calling
1629
           C{parseString}
1630
        
1631
        Example::
1632
            Word('a').parseString('aaaaabaaa')  # -> ['aaaaa']
1633
            Word('a').parseString('aaaaabaaa', parseAll=True)  # -> Exception: Expected end of text
1634
        """
1635
        ParserElement.resetCache()
1636
        if not self.streamlined:
1637
            self.streamline()
1638
            #~ self.saveAsList = True
1639
        for e in self.ignoreExprs:
1640
            e.streamline()
1641
        if not self.keepTabs:
1642
            instring = instring.expandtabs()
1643
        try:
1644
            loc, tokens = self._parse( instring, 0 )
1645
            if parseAll:
1646
                loc = self.preParse( instring, loc )
1647
                se = Empty() + StringEnd()
1648
                se._parse( instring, loc )
1649
        except ParseBaseException as exc:
1650
            if ParserElement.verbose_stacktrace:
1651
                raise
1652
            else:
1653
                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1654
                raise exc
1655
        else:
1656
            return tokens
1657

1658
    def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1659
        """
1660
        Scan the input string for expression matches.  Each match will return the
1661
        matching tokens, start location, and end location.  May be called with optional
1662
        C{maxMatches} argument, to clip scanning after 'n' matches are found.  If
1663
        C{overlap} is specified, then overlapping matches will be reported.
1664

1665
        Note that the start and end locations are reported relative to the string
1666
        being parsed.  See L{I{parseString}<parseString>} for more information on parsing
1667
        strings with embedded tabs.
1668

1669
        Example::
1670
            source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1671
            print(source)
1672
            for tokens,start,end in Word(alphas).scanString(source):
1673
                print(' '*start + '^'*(end-start))
1674
                print(' '*start + tokens[0])
1675
        
1676
        prints::
1677
        
1678
            sldjf123lsdjjkf345sldkjf879lkjsfd987
1679
            ^^^^^
1680
            sldjf
1681
                    ^^^^^^^
1682
                    lsdjjkf
1683
                              ^^^^^^
1684
                              sldkjf
1685
                                       ^^^^^^
1686
                                       lkjsfd
1687
        """
1688
        if not self.streamlined:
1689
            self.streamline()
1690
        for e in self.ignoreExprs:
1691
            e.streamline()
1692

1693
        if not self.keepTabs:
1694
            instring = _ustr(instring).expandtabs()
1695
        instrlen = len(instring)
1696
        loc = 0
1697
        preparseFn = self.preParse
1698
        parseFn = self._parse
1699
        ParserElement.resetCache()
1700
        matches = 0
1701
        try:
1702
            while loc <= instrlen and matches < maxMatches:
1703
                try:
1704
                    preloc = preparseFn( instring, loc )
1705
                    nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1706
                except ParseException:
1707
                    loc = preloc+1
1708
                else:
1709
                    if nextLoc > loc:
1710
                        matches += 1
1711
                        yield tokens, preloc, nextLoc
1712
                        if overlap:
1713
                            nextloc = preparseFn( instring, loc )
1714
                            if nextloc > loc:
1715
                                loc = nextLoc
1716
                            else:
1717
                                loc += 1
1718
                        else:
1719
                            loc = nextLoc
1720
                    else:
1721
                        loc = preloc+1
1722
        except ParseBaseException as exc:
1723
            if ParserElement.verbose_stacktrace:
1724
                raise
1725
            else:
1726
                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1727
                raise exc
1728

1729
    def transformString( self, instring ):
1730
        """
1731
        Extension to C{L{scanString}}, to modify matching text with modified tokens that may
1732
        be returned from a parse action.  To use C{transformString}, define a grammar and
1733
        attach a parse action to it that modifies the returned token list.
1734
        Invoking C{transformString()} on a target string will then scan for matches,
1735
        and replace the matched text patterns according to the logic in the parse
1736
        action.  C{transformString()} returns the resulting transformed string.
1737
        
1738
        Example::
1739
            wd = Word(alphas)
1740
            wd.setParseAction(lambda toks: toks[0].title())
1741
            
1742
            print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
1743
        Prints::
1744
            Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1745
        """
1746
        out = []
1747
        lastE = 0
1748
        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1749
        # keep string locs straight between transformString and scanString
1750
        self.keepTabs = True
1751
        try:
1752
            for t,s,e in self.scanString( instring ):
1753
                out.append( instring[lastE:s] )
1754
                if t:
1755
                    if isinstance(t,ParseResults):
1756
                        out += t.asList()
1757
                    elif isinstance(t,list):
1758
                        out += t
1759
                    else:
1760
                        out.append(t)
1761
                lastE = e
1762
            out.append(instring[lastE:])
1763
            out = [o for o in out if o]
1764
            return "".join(map(_ustr,_flatten(out)))
1765
        except ParseBaseException as exc:
1766
            if ParserElement.verbose_stacktrace:
1767
                raise
1768
            else:
1769
                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1770
                raise exc
1771

1772
    def searchString( self, instring, maxMatches=_MAX_INT ):
1773
        """
1774
        Another extension to C{L{scanString}}, simplifying the access to the tokens found
1775
        to match the given parse expression.  May be called with optional
1776
        C{maxMatches} argument, to clip searching after 'n' matches are found.
1777
        
1778
        Example::
1779
            # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1780
            cap_word = Word(alphas.upper(), alphas.lower())
1781
            
1782
            print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
1783

1784
            # the sum() builtin can be used to merge results into a single ParseResults object
1785
            print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
1786
        prints::
1787
            [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1788
            ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1789
        """
1790
        try:
1791
            return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1792
        except ParseBaseException as exc:
1793
            if ParserElement.verbose_stacktrace:
1794
                raise
1795
            else:
1796
                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1797
                raise exc
1798

1799
    def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1800
        """
1801
        Generator method to split a string using the given expression as a separator.
1802
        May be called with optional C{maxsplit} argument, to limit the number of splits;
1803
        and the optional C{includeSeparators} argument (default=C{False}), if the separating
1804
        matching text should be included in the split results.
1805
        
1806
        Example::        
1807
            punc = oneOf(list(".,;:/-!?"))
1808
            print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1809
        prints::
1810
            ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1811
        """
1812
        splits = 0
1813
        last = 0
1814
        for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1815
            yield instring[last:s]
1816
            if includeSeparators:
1817
                yield t[0]
1818
            last = e
1819
        yield instring[last:]
1820

1821
    def __add__(self, other ):
1822
        """
1823
        Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
1824
        converts them to L{Literal}s by default.
1825
        
1826
        Example::
1827
            greet = Word(alphas) + "," + Word(alphas) + "!"
1828
            hello = "Hello, World!"
1829
            print (hello, "->", greet.parseString(hello))
1830
        Prints::
1831
            Hello, World! -> ['Hello', ',', 'World', '!']
1832
        """
1833
        if isinstance( other, basestring ):
1834
            other = ParserElement._literalStringClass( other )
1835
        if not isinstance( other, ParserElement ):
1836
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1837
                    SyntaxWarning, stacklevel=2)
1838
            return None
1839
        return And( [ self, other ] )
1840

1841
    def __radd__(self, other ):
1842
        """
1843
        Implementation of + operator when left operand is not a C{L{ParserElement}}
1844
        """
1845
        if isinstance( other, basestring ):
1846
            other = ParserElement._literalStringClass( other )
1847
        if not isinstance( other, ParserElement ):
1848
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1849
                    SyntaxWarning, stacklevel=2)
1850
            return None
1851
        return other + self
1852

1853
    def __sub__(self, other):
1854
        """
1855
        Implementation of - operator, returns C{L{And}} with error stop
1856
        """
1857
        if isinstance( other, basestring ):
1858
            other = ParserElement._literalStringClass( other )
1859
        if not isinstance( other, ParserElement ):
1860
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1861
                    SyntaxWarning, stacklevel=2)
1862
            return None
1863
        return self + And._ErrorStop() + other
1864

1865
    def __rsub__(self, other ):
1866
        """
1867
        Implementation of - operator when left operand is not a C{L{ParserElement}}
1868
        """
1869
        if isinstance( other, basestring ):
1870
            other = ParserElement._literalStringClass( other )
1871
        if not isinstance( other, ParserElement ):
1872
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1873
                    SyntaxWarning, stacklevel=2)
1874
            return None
1875
        return other - self
1876

1877
    def __mul__(self,other):
1878
        """
1879
        Implementation of * operator, allows use of C{expr * 3} in place of
1880
        C{expr + expr + expr}.  Expressions may also me multiplied by a 2-integer
1881
        tuple, similar to C{{min,max}} multipliers in regular expressions.  Tuples
1882
        may also include C{None} as in:
1883
         - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1884
              to C{expr*n + L{ZeroOrMore}(expr)}
1885
              (read as "at least n instances of C{expr}")
1886
         - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1887
              (read as "0 to n instances of C{expr}")
1888
         - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1889
         - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1890

1891
        Note that C{expr*(None,n)} does not raise an exception if
1892
        more than n exprs exist in the input stream; that is,
1893
        C{expr*(None,n)} does not enforce a maximum number of expr
1894
        occurrences.  If this behavior is desired, then write
1895
        C{expr*(None,n) + ~expr}
1896
        """
1897
        if isinstance(other,int):
1898
            minElements, optElements = other,0
1899
        elif isinstance(other,tuple):
1900
            other = (other + (None, None))[:2]
1901
            if other[0] is None:
1902
                other = (0, other[1])
1903
            if isinstance(other[0],int) and other[1] is None:
1904
                if other[0] == 0:
1905
                    return ZeroOrMore(self)
1906
                if other[0] == 1:
1907
                    return OneOrMore(self)
1908
                else:
1909
                    return self*other[0] + ZeroOrMore(self)
1910
            elif isinstance(other[0],int) and isinstance(other[1],int):
1911
                minElements, optElements = other
1912
                optElements -= minElements
1913
            else:
1914
                raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1915
        else:
1916
            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1917

1918
        if minElements < 0:
1919
            raise ValueError("cannot multiply ParserElement by negative value")
1920
        if optElements < 0:
1921
            raise ValueError("second tuple value must be greater or equal to first tuple value")
1922
        if minElements == optElements == 0:
1923
            raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1924

1925
        if (optElements):
1926
            def makeOptionalList(n):
1927
                if n>1:
1928
                    return Optional(self + makeOptionalList(n-1))
1929
                else:
1930
                    return Optional(self)
1931
            if minElements:
1932
                if minElements == 1:
1933
                    ret = self + makeOptionalList(optElements)
1934
                else:
1935
                    ret = And([self]*minElements) + makeOptionalList(optElements)
1936
            else:
1937
                ret = makeOptionalList(optElements)
1938
        else:
1939
            if minElements == 1:
1940
                ret = self
1941
            else:
1942
                ret = And([self]*minElements)
1943
        return ret
1944

1945
    def __rmul__(self, other):
1946
        return self.__mul__(other)
1947

1948
    def __or__(self, other ):
1949
        """
1950
        Implementation of | operator - returns C{L{MatchFirst}}
1951
        """
1952
        if isinstance( other, basestring ):
1953
            other = ParserElement._literalStringClass( other )
1954
        if not isinstance( other, ParserElement ):
1955
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1956
                    SyntaxWarning, stacklevel=2)
1957
            return None
1958
        return MatchFirst( [ self, other ] )
1959

1960
    def __ror__(self, other ):
1961
        """
1962
        Implementation of | operator when left operand is not a C{L{ParserElement}}
1963
        """
1964
        if isinstance( other, basestring ):
1965
            other = ParserElement._literalStringClass( other )
1966
        if not isinstance( other, ParserElement ):
1967
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1968
                    SyntaxWarning, stacklevel=2)
1969
            return None
1970
        return other | self
1971

1972
    def __xor__(self, other ):
1973
        """
1974
        Implementation of ^ operator - returns C{L{Or}}
1975
        """
1976
        if isinstance( other, basestring ):
1977
            other = ParserElement._literalStringClass( other )
1978
        if not isinstance( other, ParserElement ):
1979
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1980
                    SyntaxWarning, stacklevel=2)
1981
            return None
1982
        return Or( [ self, other ] )
1983

1984
    def __rxor__(self, other ):
1985
        """
1986
        Implementation of ^ operator when left operand is not a C{L{ParserElement}}
1987
        """
1988
        if isinstance( other, basestring ):
1989
            other = ParserElement._literalStringClass( other )
1990
        if not isinstance( other, ParserElement ):
1991
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1992
                    SyntaxWarning, stacklevel=2)
1993
            return None
1994
        return other ^ self
1995

1996
    def __and__(self, other ):
1997
        """
1998
        Implementation of & operator - returns C{L{Each}}
1999
        """
2000
        if isinstance( other, basestring ):
2001
            other = ParserElement._literalStringClass( other )
2002
        if not isinstance( other, ParserElement ):
2003
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2004
                    SyntaxWarning, stacklevel=2)
2005
            return None
2006
        return Each( [ self, other ] )
2007

2008
    def __rand__(self, other ):
2009
        """
2010
        Implementation of & operator when left operand is not a C{L{ParserElement}}
2011
        """
2012
        if isinstance( other, basestring ):
2013
            other = ParserElement._literalStringClass( other )
2014
        if not isinstance( other, ParserElement ):
2015
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2016
                    SyntaxWarning, stacklevel=2)
2017
            return None
2018
        return other & self
2019

2020
    def __invert__( self ):
2021
        """
2022
        Implementation of ~ operator - returns C{L{NotAny}}
2023
        """
2024
        return NotAny( self )
2025

2026
    def __call__(self, name=None):
2027
        """
2028
        Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
2029
        
2030
        If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
2031
        passed as C{True}.
2032
           
2033
        If C{name} is omitted, same as calling C{L{copy}}.
2034

2035
        Example::
2036
            # these are equivalent
2037
            userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
2038
            userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")             
2039
        """
2040
        if name is not None:
2041
            return self.setResultsName(name)
2042
        else:
2043
            return self.copy()
2044

2045
    def suppress( self ):
2046
        """
2047
        Suppresses the output of this C{ParserElement}; useful to keep punctuation from
2048
        cluttering up returned output.
2049
        """
2050
        return Suppress( self )
2051

2052
    def leaveWhitespace( self ):
2053
        """
2054
        Disables the skipping of whitespace before matching the characters in the
2055
        C{ParserElement}'s defined pattern.  This is normally only used internally by
2056
        the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2057
        """
2058
        self.skipWhitespace = False
2059
        return self
2060

2061
    def setWhitespaceChars( self, chars ):
2062
        """
2063
        Overrides the default whitespace chars
2064
        """
2065
        self.skipWhitespace = True
2066
        self.whiteChars = chars
2067
        self.copyDefaultWhiteChars = False
2068
        return self
2069

2070
    def parseWithTabs( self ):
2071
        """
2072
        Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
2073
        Must be called before C{parseString} when the input grammar contains elements that
2074
        match C{<TAB>} characters.
2075
        """
2076
        self.keepTabs = True
2077
        return self
2078

2079
    def ignore( self, other ):
2080
        """
2081
        Define expression to be ignored (e.g., comments) while doing pattern
2082
        matching; may be called repeatedly, to define multiple comment or other
2083
        ignorable patterns.
2084
        
2085
        Example::
2086
            patt = OneOrMore(Word(alphas))
2087
            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2088
            
2089
            patt.ignore(cStyleComment)
2090
            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2091
        """
2092
        if isinstance(other, basestring):
2093
            other = Suppress(other)
2094

2095
        if isinstance( other, Suppress ):
2096
            if other not in self.ignoreExprs:
2097
                self.ignoreExprs.append(other)
2098
        else:
2099
            self.ignoreExprs.append( Suppress( other.copy() ) )
2100
        return self
2101

2102
    def setDebugActions( self, startAction, successAction, exceptionAction ):
2103
        """
2104
        Enable display of debugging messages while doing pattern matching.
2105
        """
2106
        self.debugActions = (startAction or _defaultStartDebugAction,
2107
                             successAction or _defaultSuccessDebugAction,
2108
                             exceptionAction or _defaultExceptionDebugAction)
2109
        self.debug = True
2110
        return self
2111

2112
    def setDebug( self, flag=True ):
2113
        """
2114
        Enable display of debugging messages while doing pattern matching.
2115
        Set C{flag} to True to enable, False to disable.
2116

2117
        Example::
2118
            wd = Word(alphas).setName("alphaword")
2119
            integer = Word(nums).setName("numword")
2120
            term = wd | integer
2121
            
2122
            # turn on debugging for wd
2123
            wd.setDebug()
2124

2125
            OneOrMore(term).parseString("abc 123 xyz 890")
2126
        
2127
        prints::
2128
            Match alphaword at loc 0(1,1)
2129
            Matched alphaword -> ['abc']
2130
            Match alphaword at loc 3(1,4)
2131
            Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2132
            Match alphaword at loc 7(1,8)
2133
            Matched alphaword -> ['xyz']
2134
            Match alphaword at loc 11(1,12)
2135
            Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2136
            Match alphaword at loc 15(1,16)
2137
            Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2138

2139
        The output shown is that produced by the default debug actions - custom debug actions can be
2140
        specified using L{setDebugActions}. Prior to attempting
2141
        to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
2142
        is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
2143
        message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
2144
        which makes debugging and exception messages easier to understand - for instance, the default
2145
        name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
2146
        """
2147
        if flag:
2148
            self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
2149
        else:
2150
            self.debug = False
2151
        return self
2152

2153
    def __str__( self ):
2154
        return self.name
2155

2156
    def __repr__( self ):
2157
        return _ustr(self)
2158

2159
    def streamline( self ):
2160
        self.streamlined = True
2161
        self.strRepr = None
2162
        return self
2163

2164
    def checkRecursion( self, parseElementList ):
2165
        pass
2166

2167
    def validate( self, validateTrace=[] ):
2168
        """
2169
        Check defined expressions for valid structure, check for infinite recursive definitions.
2170
        """
2171
        self.checkRecursion( [] )
2172

2173
    def parseFile( self, file_or_filename, parseAll=False ):
2174
        """
2175
        Execute the parse expression on the given file or filename.
2176
        If a filename is specified (instead of a file object),
2177
        the entire file is opened, read, and closed before parsing.
2178
        """
2179
        try:
2180
            file_contents = file_or_filename.read()
2181
        except AttributeError:
2182
            with open(file_or_filename, "r") as f:
2183
                file_contents = f.read()
2184
        try:
2185
            return self.parseString(file_contents, parseAll)
2186
        except ParseBaseException as exc:
2187
            if ParserElement.verbose_stacktrace:
2188
                raise
2189
            else:
2190
                # catch and re-raise exception from here, clears out pyparsing internal stack trace
2191
                raise exc
2192

2193
    def __eq__(self,other):
2194
        if isinstance(other, ParserElement):
2195
            return self is other or vars(self) == vars(other)
2196
        elif isinstance(other, basestring):
2197
            return self.matches(other)
2198
        else:
2199
            return super(ParserElement,self)==other
2200

2201
    def __ne__(self,other):
2202
        return not (self == other)
2203

2204
    def __hash__(self):
2205
        return hash(id(self))
2206

2207
    def __req__(self,other):
2208
        return self == other
2209

2210
    def __rne__(self,other):
2211
        return not (self == other)
2212

2213
    def matches(self, testString, parseAll=True):
2214
        """
2215
        Method for quick testing of a parser against a test string. Good for simple 
2216
        inline microtests of sub expressions while building up larger parser.
2217
           
2218
        Parameters:
2219
         - testString - to test against this expression for a match
2220
         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2221
            
2222
        Example::
2223
            expr = Word(nums)
2224
            assert expr.matches("100")
2225
        """
2226
        try:
2227
            self.parseString(_ustr(testString), parseAll=parseAll)
2228
            return True
2229
        except ParseBaseException:
2230
            return False
2231
                
2232
    def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2233
        """
2234
        Execute the parse expression on a series of test strings, showing each
2235
        test, the parsed results or where the parse failed. Quick and easy way to
2236
        run a parse expression against a list of sample strings.
2237
           
2238
        Parameters:
2239
         - tests - a list of separate test strings, or a multiline string of test strings
2240
         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests           
2241
         - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 
2242
              string; pass None to disable comment filtering
2243
         - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
2244
              if False, only dump nested list
2245
         - printResults - (default=C{True}) prints test output to stdout
2246
         - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
2247

2248
        Returns: a (success, results) tuple, where success indicates that all tests succeeded
2249
        (or failed if C{failureTests} is True), and the results contain a list of lines of each 
2250
        test's output
2251
        
2252
        Example::
2253
            number_expr = pyparsing_common.number.copy()
2254

2255
            result = number_expr.runTests('''
2256
                # unsigned integer
2257
                100
2258
                # negative integer
2259
                -100
2260
                # float with scientific notation
2261
                6.02e23
2262
                # integer with scientific notation
2263
                1e-12
2264
                ''')
2265
            print("Success" if result[0] else "Failed!")
2266

2267
            result = number_expr.runTests('''
2268
                # stray character
2269
                100Z
2270
                # missing leading digit before '.'
2271
                -.100
2272
                # too many '.'
2273
                3.14.159
2274
                ''', failureTests=True)
2275
            print("Success" if result[0] else "Failed!")
2276
        prints::
2277
            # unsigned integer
2278
            100
2279
            [100]
2280

2281
            # negative integer
2282
            -100
2283
            [-100]
2284

2285
            # float with scientific notation
2286
            6.02e23
2287
            [6.02e+23]
2288

2289
            # integer with scientific notation
2290
            1e-12
2291
            [1e-12]
2292

2293
            Success
2294
            
2295
            # stray character
2296
            100Z
2297
               ^
2298
            FAIL: Expected end of text (at char 3), (line:1, col:4)
2299

2300
            # missing leading digit before '.'
2301
            -.100
2302
            ^
2303
            FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2304

2305
            # too many '.'
2306
            3.14.159
2307
                ^
2308
            FAIL: Expected end of text (at char 4), (line:1, col:5)
2309

2310
            Success
2311

2312
        Each test string must be on a single line. If you want to test a string that spans multiple
2313
        lines, create a test like this::
2314

2315
            expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2316
        
2317
        (Note that this is a raw string literal, you must include the leading 'r'.)
2318
        """
2319
        if isinstance(tests, basestring):
2320
            tests = list(map(str.strip, tests.rstrip().splitlines()))
2321
        if isinstance(comment, basestring):
2322
            comment = Literal(comment)
2323
        allResults = []
2324
        comments = []
2325
        success = True
2326
        for t in tests:
2327
            if comment is not None and comment.matches(t, False) or comments and not t:
2328
                comments.append(t)
2329
                continue
2330
            if not t:
2331
                continue
2332
            out = ['\n'.join(comments), t]
2333
            comments = []
2334
            try:
2335
                t = t.replace(r'\n','\n')
2336
                result = self.parseString(t, parseAll=parseAll)
2337
                out.append(result.dump(full=fullDump))
2338
                success = success and not failureTests
2339
            except ParseBaseException as pe:
2340
                fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2341
                if '\n' in t:
2342
                    out.append(line(pe.loc, t))
2343
                    out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
2344
                else:
2345
                    out.append(' '*pe.loc + '^' + fatal)
2346
                out.append("FAIL: " + str(pe))
2347
                success = success and failureTests
2348
                result = pe
2349
            except Exception as exc:
2350
                out.append("FAIL-EXCEPTION: " + str(exc))
2351
                success = success and failureTests
2352
                result = exc
2353

2354
            if printResults:
2355
                if fullDump:
2356
                    out.append('')
2357
                print('\n'.join(out))
2358

2359
            allResults.append((t, result))
2360
        
2361
        return success, allResults
2362

2363
        
2364
class Token(ParserElement):
2365
    """
2366
    Abstract C{ParserElement} subclass, for defining atomic matching patterns.
2367
    """
2368
    def __init__( self ):
2369
        super(Token,self).__init__( savelist=False )
2370

2371

2372
class Empty(Token):
2373
    """
2374
    An empty token, will always match.
2375
    """
2376
    def __init__( self ):
2377
        super(Empty,self).__init__()
2378
        self.name = "Empty"
2379
        self.mayReturnEmpty = True
2380
        self.mayIndexError = False
2381

2382

2383
class NoMatch(Token):
2384
    """
2385
    A token that will never match.
2386
    """
2387
    def __init__( self ):
2388
        super(NoMatch,self).__init__()
2389
        self.name = "NoMatch"
2390
        self.mayReturnEmpty = True
2391
        self.mayIndexError = False
2392
        self.errmsg = "Unmatchable token"
2393

2394
    def parseImpl( self, instring, loc, doActions=True ):
2395
        raise ParseException(instring, loc, self.errmsg, self)
2396

2397

2398
class Literal(Token):
2399
    """
2400
    Token to exactly match a specified string.
2401
    
2402
    Example::
2403
        Literal('blah').parseString('blah')  # -> ['blah']
2404
        Literal('blah').parseString('blahfooblah')  # -> ['blah']
2405
        Literal('blah').parseString('bla')  # -> Exception: Expected "blah"
2406
    
2407
    For case-insensitive matching, use L{CaselessLiteral}.
2408
    
2409
    For keyword matching (force word break before and after the matched string),
2410
    use L{Keyword} or L{CaselessKeyword}.
2411
    """
2412
    def __init__( self, matchString ):
2413
        super(Literal,self).__init__()
2414
        self.match = matchString
2415
        self.matchLen = len(matchString)
2416
        try:
2417
            self.firstMatchChar = matchString[0]
2418
        except IndexError:
2419
            warnings.warn("null string passed to Literal; use Empty() instead",
2420
                            SyntaxWarning, stacklevel=2)
2421
            self.__class__ = Empty
2422
        self.name = '"%s"' % _ustr(self.match)
2423
        self.errmsg = "Expected " + self.name
2424
        self.mayReturnEmpty = False
2425
        self.mayIndexError = False
2426

2427
    # Performance tuning: this routine gets called a *lot*
2428
    # if this is a single character match string  and the first character matches,
2429
    # short-circuit as quickly as possible, and avoid calling startswith
2430
    #~ @profile
2431
    def parseImpl( self, instring, loc, doActions=True ):
2432
        if (instring[loc] == self.firstMatchChar and
2433
            (self.matchLen==1 or instring.startswith(self.match,loc)) ):
2434
            return loc+self.matchLen, self.match
2435
        raise ParseException(instring, loc, self.errmsg, self)
2436
_L = Literal
2437
ParserElement._literalStringClass = Literal
2438

2439
class Keyword(Token):
2440
    """
2441
    Token to exactly match a specified string as a keyword, that is, it must be
2442
    immediately followed by a non-keyword character.  Compare with C{L{Literal}}:
2443
     - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
2444
     - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
2445
    Accepts two optional constructor arguments in addition to the keyword string:
2446
     - C{identChars} is a string of characters that would be valid identifier characters,
2447
          defaulting to all alphanumerics + "_" and "$"
2448
     - C{caseless} allows case-insensitive matching, default is C{False}.
2449
       
2450
    Example::
2451
        Keyword("start").parseString("start")  # -> ['start']
2452
        Keyword("start").parseString("starting")  # -> Exception
2453

2454
    For case-insensitive matching, use L{CaselessKeyword}.
2455
    """
2456
    DEFAULT_KEYWORD_CHARS = alphanums+"_$"
2457

2458
    def __init__( self, matchString, identChars=None, caseless=False ):
2459
        super(Keyword,self).__init__()
2460
        if identChars is None:
2461
            identChars = Keyword.DEFAULT_KEYWORD_CHARS
2462
        self.match = matchString
2463
        self.matchLen = len(matchString)
2464
        try:
2465
            self.firstMatchChar = matchString[0]
2466
        except IndexError:
2467
            warnings.warn("null string passed to Keyword; use Empty() instead",
2468
                            SyntaxWarning, stacklevel=2)
2469
        self.name = '"%s"' % self.match
2470
        self.errmsg = "Expected " + self.name
2471
        self.mayReturnEmpty = False
2472
        self.mayIndexError = False
2473
        self.caseless = caseless
2474
        if caseless:
2475
            self.caselessmatch = matchString.upper()
2476
            identChars = identChars.upper()
2477
        self.identChars = set(identChars)
2478

2479
    def parseImpl( self, instring, loc, doActions=True ):
2480
        if self.caseless:
2481
            if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2482
                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
2483
                 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
2484
                return loc+self.matchLen, self.match
2485
        else:
2486
            if (instring[loc] == self.firstMatchChar and
2487
                (self.matchLen==1 or instring.startswith(self.match,loc)) and
2488
                (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
2489
                (loc == 0 or instring[loc-1] not in self.identChars) ):
2490
                return loc+self.matchLen, self.match
2491
        raise ParseException(instring, loc, self.errmsg, self)
2492

2493
    def copy(self):
2494
        c = super(Keyword,self).copy()
2495
        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
2496
        return c
2497

2498
    @staticmethod
2499
    def setDefaultKeywordChars( chars ):
2500
        """Overrides the default Keyword chars
2501
        """
2502
        Keyword.DEFAULT_KEYWORD_CHARS = chars
2503

2504
class CaselessLiteral(Literal):
2505
    """
2506
    Token to match a specified string, ignoring case of letters.
2507
    Note: the matched results will always be in the case of the given
2508
    match string, NOT the case of the input text.
2509

2510
    Example::
2511
        OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2512
        
2513
    (Contrast with example for L{CaselessKeyword}.)
2514
    """
2515
    def __init__( self, matchString ):
2516
        super(CaselessLiteral,self).__init__( matchString.upper() )
2517
        # Preserve the defining literal.
2518
        self.returnString = matchString
2519
        self.name = "'%s'" % self.returnString
2520
        self.errmsg = "Expected " + self.name
2521

2522
    def parseImpl( self, instring, loc, doActions=True ):
2523
        if instring[ loc:loc+self.matchLen ].upper() == self.match:
2524
            return loc+self.matchLen, self.returnString
2525
        raise ParseException(instring, loc, self.errmsg, self)
2526

2527
class CaselessKeyword(Keyword):
2528
    """
2529
    Caseless version of L{Keyword}.
2530

2531
    Example::
2532
        OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2533
        
2534
    (Contrast with example for L{CaselessLiteral}.)
2535
    """
2536
    def __init__( self, matchString, identChars=None ):
2537
        super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2538

2539
    def parseImpl( self, instring, loc, doActions=True ):
2540
        if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2541
             (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
2542
            return loc+self.matchLen, self.match
2543
        raise ParseException(instring, loc, self.errmsg, self)
2544

2545
class CloseMatch(Token):
2546
    """
2547
    A variation on L{Literal} which matches "close" matches, that is, 
2548
    strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
2549
     - C{match_string} - string to be matched
2550
     - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
2551
    
2552
    The results from a successful parse will contain the matched text from the input string and the following named results:
2553
     - C{mismatches} - a list of the positions within the match_string where mismatches were found
2554
     - C{original} - the original match_string used to compare against the input string
2555
    
2556
    If C{mismatches} is an empty list, then the match was an exact match.
2557
    
2558
    Example::
2559
        patt = CloseMatch("ATCATCGAATGGA")
2560
        patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2561
        patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2562

2563
        # exact match
2564
        patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2565

2566
        # close match allowing up to 2 mismatches
2567
        patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
2568
        patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2569
    """
2570
    def __init__(self, match_string, maxMismatches=1):
2571
        super(CloseMatch,self).__init__()
2572
        self.name = match_string
2573
        self.match_string = match_string
2574
        self.maxMismatches = maxMismatches
2575
        self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
2576
        self.mayIndexError = False
2577
        self.mayReturnEmpty = False
2578

2579
    def parseImpl( self, instring, loc, doActions=True ):
2580
        start = loc
2581
        instrlen = len(instring)
2582
        maxloc = start + len(self.match_string)
2583

2584
        if maxloc <= instrlen:
2585
            match_string = self.match_string
2586
            match_stringloc = 0
2587
            mismatches = []
2588
            maxMismatches = self.maxMismatches
2589

2590
            for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
2591
                src,mat = s_m
2592
                if src != mat:
2593
                    mismatches.append(match_stringloc)
2594
                    if len(mismatches) > maxMismatches:
2595
                        break
2596
            else:
2597
                loc = match_stringloc + 1
2598
                results = ParseResults([instring[start:loc]])
2599
                results['original'] = self.match_string
2600
                results['mismatches'] = mismatches
2601
                return loc, results
2602

2603
        raise ParseException(instring, loc, self.errmsg, self)
2604

2605

2606
class Word(Token):
2607
    """
2608
    Token for matching words composed of allowed character sets.
2609
    Defined with string containing all allowed initial characters,
2610
    an optional string containing allowed body characters (if omitted,
2611
    defaults to the initial character set), and an optional minimum,
2612
    maximum, and/or exact length.  The default value for C{min} is 1 (a
2613
    minimum value < 1 is not valid); the default values for C{max} and C{exact}
2614
    are 0, meaning no maximum or exact length restriction. An optional
2615
    C{excludeChars} parameter can list characters that might be found in 
2616
    the input C{bodyChars} string; useful to define a word of all printables
2617
    except for one or two characters, for instance.
2618
    
2619
    L{srange} is useful for defining custom character set strings for defining 
2620
    C{Word} expressions, using range notation from regular expression character sets.
2621
    
2622
    A common mistake is to use C{Word} to match a specific literal string, as in 
2623
    C{Word("Address")}. Remember that C{Word} uses the string argument to define
2624
    I{sets} of matchable characters. This expression would match "Add", "AAA",
2625
    "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
2626
    To match an exact literal string, use L{Literal} or L{Keyword}.
2627

2628
    pyparsing includes helper strings for building Words:
2629
     - L{alphas}
2630
     - L{nums}
2631
     - L{alphanums}
2632
     - L{hexnums}
2633
     - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
2634
     - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2635
     - L{printables} (any non-whitespace character)
2636

2637
    Example::
2638
        # a word composed of digits
2639
        integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2640
        
2641
        # a word with a leading capital, and zero or more lowercase
2642
        capital_word = Word(alphas.upper(), alphas.lower())
2643

2644
        # hostnames are alphanumeric, with leading alpha, and '-'
2645
        hostname = Word(alphas, alphanums+'-')
2646
        
2647
        # roman numeral (not a strict parser, accepts invalid mix of characters)
2648
        roman = Word("IVXLCDM")
2649
        
2650
        # any string of non-whitespace characters, except for ','
2651
        csv_value = Word(printables, excludeChars=",")
2652
    """
2653
    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2654
        super(Word,self).__init__()
2655
        if excludeChars:
2656
            initChars = ''.join(c for c in initChars if c not in excludeChars)
2657
            if bodyChars:
2658
                bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
2659
        self.initCharsOrig = initChars
2660
        self.initChars = set(initChars)
2661
        if bodyChars :
2662
            self.bodyCharsOrig = bodyChars
2663
            self.bodyChars = set(bodyChars)
2664
        else:
2665
            self.bodyCharsOrig = initChars
2666
            self.bodyChars = set(initChars)
2667

2668
        self.maxSpecified = max > 0
2669

2670
        if min < 1:
2671
            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
2672

2673
        self.minLen = min
2674

2675
        if max > 0:
2676
            self.maxLen = max
2677
        else:
2678
            self.maxLen = _MAX_INT
2679

2680
        if exact > 0:
2681
            self.maxLen = exact
2682
            self.minLen = exact
2683

2684
        self.name = _ustr(self)
2685
        self.errmsg = "Expected " + self.name
2686
        self.mayIndexError = False
2687
        self.asKeyword = asKeyword
2688

2689
        if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
2690
            if self.bodyCharsOrig == self.initCharsOrig:
2691
                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
2692
            elif len(self.initCharsOrig) == 1:
2693
                self.reString = "%s[%s]*" % \
2694
                                      (re.escape(self.initCharsOrig),
2695
                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
2696
            else:
2697
                self.reString = "[%s][%s]*" % \
2698
                                      (_escapeRegexRangeChars(self.initCharsOrig),
2699
                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
2700
            if self.asKeyword:
2701
                self.reString = r"\b"+self.reString+r"\b"
2702
            try:
2703
                self.re = re.compile( self.reString )
2704
            except Exception:
2705
                self.re = None
2706

2707
    def parseImpl( self, instring, loc, doActions=True ):
2708
        if self.re:
2709
            result = self.re.match(instring,loc)
2710
            if not result:
2711
                raise ParseException(instring, loc, self.errmsg, self)
2712

2713
            loc = result.end()
2714
            return loc, result.group()
2715

2716
        if not(instring[ loc ] in self.initChars):
2717
            raise ParseException(instring, loc, self.errmsg, self)
2718

2719
        start = loc
2720
        loc += 1
2721
        instrlen = len(instring)
2722
        bodychars = self.bodyChars
2723
        maxloc = start + self.maxLen
2724
        maxloc = min( maxloc, instrlen )
2725
        while loc < maxloc and instring[loc] in bodychars:
2726
            loc += 1
2727

2728
        throwException = False
2729
        if loc - start < self.minLen:
2730
            throwException = True
2731
        if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2732
            throwException = True
2733
        if self.asKeyword:
2734
            if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
2735
                throwException = True
2736

2737
        if throwException:
2738
            raise ParseException(instring, loc, self.errmsg, self)
2739

2740
        return loc, instring[start:loc]
2741

2742
    def __str__( self ):
2743
        try:
2744
            return super(Word,self).__str__()
2745
        except Exception:
2746
            pass
2747

2748

2749
        if self.strRepr is None:
2750

2751
            def charsAsStr(s):
2752
                if len(s)>4:
2753
                    return s[:4]+"..."
2754
                else:
2755
                    return s
2756

2757
            if ( self.initCharsOrig != self.bodyCharsOrig ):
2758
                self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
2759
            else:
2760
                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
2761

2762
        return self.strRepr
2763

2764

2765
class Regex(Token):
2766
    r"""
2767
    Token for matching strings that match a given regular expression.
2768
    Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
2769
    If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as 
2770
    named parse results.
2771

2772
    Example::
2773
        realnum = Regex(r"[+-]?\d+\.\d*")
2774
        date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
2775
        # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2776
        roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2777
    """
2778
    compiledREtype = type(re.compile("[A-Z]"))
2779
    def __init__( self, pattern, flags=0):
2780
        """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
2781
        super(Regex,self).__init__()
2782

2783
        if isinstance(pattern, basestring):
2784
            if not pattern:
2785
                warnings.warn("null string passed to Regex; use Empty() instead",
2786
                        SyntaxWarning, stacklevel=2)
2787

2788
            self.pattern = pattern
2789
            self.flags = flags
2790

2791
            try:
2792
                self.re = re.compile(self.pattern, self.flags)
2793
                self.reString = self.pattern
2794
            except sre_constants.error:
2795
                warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
2796
                    SyntaxWarning, stacklevel=2)
2797
                raise
2798

2799
        elif isinstance(pattern, Regex.compiledREtype):
2800
            self.re = pattern
2801
            self.pattern = \
2802
            self.reString = str(pattern)
2803
            self.flags = flags
2804
            
2805
        else:
2806
            raise ValueError("Regex may only be constructed with a string or a compiled RE object")
2807

2808
        self.name = _ustr(self)
2809
        self.errmsg = "Expected " + self.name
2810
        self.mayIndexError = False
2811
        self.mayReturnEmpty = True
2812

2813
    def parseImpl( self, instring, loc, doActions=True ):
2814
        result = self.re.match(instring,loc)
2815
        if not result:
2816
            raise ParseException(instring, loc, self.errmsg, self)
2817

2818
        loc = result.end()
2819
        d = result.groupdict()
2820
        ret = ParseResults(result.group())
2821
        if d:
2822
            for k in d:
2823
                ret[k] = d[k]
2824
        return loc,ret
2825

2826
    def __str__( self ):
2827
        try:
2828
            return super(Regex,self).__str__()
2829
        except Exception:
2830
            pass
2831

2832
        if self.strRepr is None:
2833
            self.strRepr = "Re:(%s)" % repr(self.pattern)
2834

2835
        return self.strRepr
2836

2837

2838
class QuotedString(Token):
2839
    r"""
2840
    Token for matching strings that are delimited by quoting characters.
2841
    
2842
    Defined with the following parameters:
2843
        - quoteChar - string of one or more characters defining the quote delimiting string
2844
        - escChar - character to escape quotes, typically backslash (default=C{None})
2845
        - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
2846
        - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2847
        - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2848
        - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2849
        - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2850

2851
    Example::
2852
        qs = QuotedString('"')
2853
        print(qs.searchString('lsjdf "This is the quote" sldjf'))
2854
        complex_qs = QuotedString('{{', endQuoteChar='}}')
2855
        print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
2856
        sql_qs = QuotedString('"', escQuote='""')
2857
        print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
2858
    prints::
2859
        [['This is the quote']]
2860
        [['This is the "quote"']]
2861
        [['This is the quote with "embedded" quotes']]
2862
    """
2863
    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2864
        super(QuotedString,self).__init__()
2865

2866
        # remove white space from quote chars - wont work anyway
2867
        quoteChar = quoteChar.strip()
2868
        if not quoteChar:
2869
            warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2870
            raise SyntaxError()
2871

2872
        if endQuoteChar is None:
2873
            endQuoteChar = quoteChar
2874
        else:
2875
            endQuoteChar = endQuoteChar.strip()
2876
            if not endQuoteChar:
2877
                warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2878
                raise SyntaxError()
2879

2880
        self.quoteChar = quoteChar
2881
        self.quoteCharLen = len(quoteChar)
2882
        self.firstQuoteChar = quoteChar[0]
2883
        self.endQuoteChar = endQuoteChar
2884
        self.endQuoteCharLen = len(endQuoteChar)
2885
        self.escChar = escChar
2886
        self.escQuote = escQuote
2887
        self.unquoteResults = unquoteResults
2888
        self.convertWhitespaceEscapes = convertWhitespaceEscapes
2889

2890
        if multiline:
2891
            self.flags = re.MULTILINE | re.DOTALL
2892
            self.pattern = r'%s(?:[^%s%s]' % \
2893
                ( re.escape(self.quoteChar),
2894
                  _escapeRegexRangeChars(self.endQuoteChar[0]),
2895
                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2896
        else:
2897
            self.flags = 0
2898
            self.pattern = r'%s(?:[^%s\n\r%s]' % \
2899
                ( re.escape(self.quoteChar),
2900
                  _escapeRegexRangeChars(self.endQuoteChar[0]),
2901
                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2902
        if len(self.endQuoteChar) > 1:
2903
            self.pattern += (
2904
                '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2905
                                               _escapeRegexRangeChars(self.endQuoteChar[i]))
2906
                                    for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2907
                )
2908
        if escQuote:
2909
            self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2910
        if escChar:
2911
            self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2912
            self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2913
        self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2914

2915
        try:
2916
            self.re = re.compile(self.pattern, self.flags)
2917
            self.reString = self.pattern
2918
        except sre_constants.error:
2919
            warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2920
                SyntaxWarning, stacklevel=2)
2921
            raise
2922

2923
        self.name = _ustr(self)
2924
        self.errmsg = "Expected " + self.name
2925
        self.mayIndexError = False
2926
        self.mayReturnEmpty = True
2927

2928
    def parseImpl( self, instring, loc, doActions=True ):
2929
        result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2930
        if not result:
2931
            raise ParseException(instring, loc, self.errmsg, self)
2932

2933
        loc = result.end()
2934
        ret = result.group()
2935

2936
        if self.unquoteResults:
2937

2938
            # strip off quotes
2939
            ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2940

2941
            if isinstance(ret,basestring):
2942
                # replace escaped whitespace
2943
                if '\\' in ret and self.convertWhitespaceEscapes:
2944
                    ws_map = {
2945
                        r'\t' : '\t',
2946
                        r'\n' : '\n',
2947
                        r'\f' : '\f',
2948
                        r'\r' : '\r',
2949
                    }
2950
                    for wslit,wschar in ws_map.items():
2951
                        ret = ret.replace(wslit, wschar)
2952

2953
                # replace escaped characters
2954
                if self.escChar:
2955
                    ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
2956

2957
                # replace escaped quotes
2958
                if self.escQuote:
2959
                    ret = ret.replace(self.escQuote, self.endQuoteChar)
2960

2961
        return loc, ret
2962

2963
    def __str__( self ):
2964
        try:
2965
            return super(QuotedString,self).__str__()
2966
        except Exception:
2967
            pass
2968

2969
        if self.strRepr is None:
2970
            self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2971

2972
        return self.strRepr
2973

2974

2975
class CharsNotIn(Token):
2976
    """
2977
    Token for matching words composed of characters I{not} in a given set (will
2978
    include whitespace in matched characters if not listed in the provided exclusion set - see example).
2979
    Defined with string containing all disallowed characters, and an optional
2980
    minimum, maximum, and/or exact length.  The default value for C{min} is 1 (a
2981
    minimum value < 1 is not valid); the default values for C{max} and C{exact}
2982
    are 0, meaning no maximum or exact length restriction.
2983

2984
    Example::
2985
        # define a comma-separated-value as anything that is not a ','
2986
        csv_value = CharsNotIn(',')
2987
        print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
2988
    prints::
2989
        ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
2990
    """
2991
    def __init__( self, notChars, min=1, max=0, exact=0 ):
2992
        super(CharsNotIn,self).__init__()
2993
        self.skipWhitespace = False
2994
        self.notChars = notChars
2995

2996
        if min < 1:
2997
            raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2998

2999
        self.minLen = min
3000

3001
        if max > 0:
3002
            self.maxLen = max
3003
        else:
3004
            self.maxLen = _MAX_INT
3005

3006
        if exact > 0:
3007
            self.maxLen = exact
3008
            self.minLen = exact
3009

3010
        self.name = _ustr(self)
3011
        self.errmsg = "Expected " + self.name
3012
        self.mayReturnEmpty = ( self.minLen == 0 )
3013
        self.mayIndexError = False
3014

3015
    def parseImpl( self, instring, loc, doActions=True ):
3016
        if instring[loc] in self.notChars:
3017
            raise ParseException(instring, loc, self.errmsg, self)
3018

3019
        start = loc
3020
        loc += 1
3021
        notchars = self.notChars
3022
        maxlen = min( start+self.maxLen, len(instring) )
3023
        while loc < maxlen and \
3024
              (instring[loc] not in notchars):
3025
            loc += 1
3026

3027
        if loc - start < self.minLen:
3028
            raise ParseException(instring, loc, self.errmsg, self)
3029

3030
        return loc, instring[start:loc]
3031

3032
    def __str__( self ):
3033
        try:
3034
            return super(CharsNotIn, self).__str__()
3035
        except Exception:
3036
            pass
3037

3038
        if self.strRepr is None:
3039
            if len(self.notChars) > 4:
3040
                self.strRepr = "!W:(%s...)" % self.notChars[:4]
3041
            else:
3042
                self.strRepr = "!W:(%s)" % self.notChars
3043

3044
        return self.strRepr
3045

3046
class White(Token):
3047
    """
3048
    Special matching class for matching whitespace.  Normally, whitespace is ignored
3049
    by pyparsing grammars.  This class is included when some whitespace structures
3050
    are significant.  Define with a string containing the whitespace characters to be
3051
    matched; default is C{" \\t\\r\\n"}.  Also takes optional C{min}, C{max}, and C{exact} arguments,
3052
    as defined for the C{L{Word}} class.
3053
    """
3054
    whiteStrs = {
3055
        " " : "<SPC>",
3056
        "\t": "<TAB>",
3057
        "\n": "<LF>",
3058
        "\r": "<CR>",
3059
        "\f": "<FF>",
3060
        }
3061
    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3062
        super(White,self).__init__()
3063
        self.matchWhite = ws
3064
        self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
3065
        #~ self.leaveWhitespace()
3066
        self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3067
        self.mayReturnEmpty = True
3068
        self.errmsg = "Expected " + self.name
3069

3070
        self.minLen = min
3071

3072
        if max > 0:
3073
            self.maxLen = max
3074
        else:
3075
            self.maxLen = _MAX_INT
3076

3077
        if exact > 0:
3078
            self.maxLen = exact
3079
            self.minLen = exact
3080

3081
    def parseImpl( self, instring, loc, doActions=True ):
3082
        if not(instring[ loc ] in self.matchWhite):
3083
            raise ParseException(instring, loc, self.errmsg, self)
3084
        start = loc
3085
        loc += 1
3086
        maxloc = start + self.maxLen
3087
        maxloc = min( maxloc, len(instring) )
3088
        while loc < maxloc and instring[loc] in self.matchWhite:
3089
            loc += 1
3090

3091
        if loc - start < self.minLen:
3092
            raise ParseException(instring, loc, self.errmsg, self)
3093

3094
        return loc, instring[start:loc]
3095

3096

3097
class _PositionToken(Token):
3098
    def __init__( self ):
3099
        super(_PositionToken,self).__init__()
3100
        self.name=self.__class__.__name__
3101
        self.mayReturnEmpty = True
3102
        self.mayIndexError = False
3103

3104
class GoToColumn(_PositionToken):
3105
    """
3106
    Token to advance to a specific column of input text; useful for tabular report scraping.
3107
    """
3108
    def __init__( self, colno ):
3109
        super(GoToColumn,self).__init__()
3110
        self.col = colno
3111

3112
    def preParse( self, instring, loc ):
3113
        if col(loc,instring) != self.col:
3114
            instrlen = len(instring)
3115
            if self.ignoreExprs:
3116
                loc = self._skipIgnorables( instring, loc )
3117
            while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
3118
                loc += 1
3119
        return loc
3120

3121
    def parseImpl( self, instring, loc, doActions=True ):
3122
        thiscol = col( loc, instring )
3123
        if thiscol > self.col:
3124
            raise ParseException( instring, loc, "Text not in expected column", self )
3125
        newloc = loc + self.col - thiscol
3126
        ret = instring[ loc: newloc ]
3127
        return newloc, ret
3128

3129

3130
class LineStart(_PositionToken):
3131
    """
3132
    Matches if current position is at the beginning of a line within the parse string
3133
    
3134
    Example::
3135
    
3136
        test = '''\
3137
        AAA this line
3138
        AAA and this line
3139
          AAA but not this one
3140
        B AAA and definitely not this one
3141
        '''
3142

3143
        for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3144
            print(t)
3145
    
3146
    Prints::
3147
        ['AAA', ' this line']
3148
        ['AAA', ' and this line']    
3149

3150
    """
3151
    def __init__( self ):
3152
        super(LineStart,self).__init__()
3153
        self.errmsg = "Expected start of line"
3154

3155
    def parseImpl( self, instring, loc, doActions=True ):
3156
        if col(loc, instring) == 1:
3157
            return loc, []
3158
        raise ParseException(instring, loc, self.errmsg, self)
3159

3160
class LineEnd(_PositionToken):
3161
    """
3162
    Matches if current position is at the end of a line within the parse string
3163
    """
3164
    def __init__( self ):
3165
        super(LineEnd,self).__init__()
3166
        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
3167
        self.errmsg = "Expected end of line"
3168

3169
    def parseImpl( self, instring, loc, doActions=True ):
3170
        if loc<len(instring):
3171
            if instring[loc] == "\n":
3172
                return loc+1, "\n"
3173
            else:
3174
                raise ParseException(instring, loc, self.errmsg, self)
3175
        elif loc == len(instring):
3176
            return loc+1, []
3177
        else:
3178
            raise ParseException(instring, loc, self.errmsg, self)
3179

3180
class StringStart(_PositionToken):
3181
    """
3182
    Matches if current position is at the beginning of the parse string
3183
    """
3184
    def __init__( self ):
3185
        super(StringStart,self).__init__()
3186
        self.errmsg = "Expected start of text"
3187

3188
    def parseImpl( self, instring, loc, doActions=True ):
3189
        if loc != 0:
3190
            # see if entire string up to here is just whitespace and ignoreables
3191
            if loc != self.preParse( instring, 0 ):
3192
                raise ParseException(instring, loc, self.errmsg, self)
3193
        return loc, []
3194

3195
class StringEnd(_PositionToken):
3196
    """
3197
    Matches if current position is at the end of the parse string
3198
    """
3199
    def __init__( self ):
3200
        super(StringEnd,self).__init__()
3201
        self.errmsg = "Expected end of text"
3202

3203
    def parseImpl( self, instring, loc, doActions=True ):
3204
        if loc < len(instring):
3205
            raise ParseException(instring, loc, self.errmsg, self)
3206
        elif loc == len(instring):
3207
            return loc+1, []
3208
        elif loc > len(instring):
3209
            return loc, []
3210
        else:
3211
            raise ParseException(instring, loc, self.errmsg, self)
3212

3213
class WordStart(_PositionToken):
3214
    """
3215
    Matches if the current position is at the beginning of a Word, and
3216
    is not preceded by any character in a given set of C{wordChars}
3217
    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3218
    use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
3219
    the string being parsed, or at the beginning of a line.
3220
    """
3221
    def __init__(self, wordChars = printables):
3222
        super(WordStart,self).__init__()
3223
        self.wordChars = set(wordChars)
3224
        self.errmsg = "Not at the start of a word"
3225

3226
    def parseImpl(self, instring, loc, doActions=True ):
3227
        if loc != 0:
3228
            if (instring[loc-1] in self.wordChars or
3229
                instring[loc] not in self.wordChars):
3230
                raise ParseException(instring, loc, self.errmsg, self)
3231
        return loc, []
3232

3233
class WordEnd(_PositionToken):
3234
    """
3235
    Matches if the current position is at the end of a Word, and
3236
    is not followed by any character in a given set of C{wordChars}
3237
    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3238
    use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
3239
    the string being parsed, or at the end of a line.
3240
    """
3241
    def __init__(self, wordChars = printables):
3242
        super(WordEnd,self).__init__()
3243
        self.wordChars = set(wordChars)
3244
        self.skipWhitespace = False
3245
        self.errmsg = "Not at the end of a word"
3246

3247
    def parseImpl(self, instring, loc, doActions=True ):
3248
        instrlen = len(instring)
3249
        if instrlen>0 and loc<instrlen:
3250
            if (instring[loc] in self.wordChars or
3251
                instring[loc-1] not in self.wordChars):
3252
                raise ParseException(instring, loc, self.errmsg, self)
3253
        return loc, []
3254

3255

3256
class ParseExpression(ParserElement):
3257
    """
3258
    Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
3259
    """
3260
    def __init__( self, exprs, savelist = False ):
3261
        super(ParseExpression,self).__init__(savelist)
3262
        if isinstance( exprs, _generatorType ):
3263
            exprs = list(exprs)
3264

3265
        if isinstance( exprs, basestring ):
3266
            self.exprs = [ ParserElement._literalStringClass( exprs ) ]
3267
        elif isinstance( exprs, Iterable ):
3268
            exprs = list(exprs)
3269
            # if sequence of strings provided, wrap with Literal
3270
            if all(isinstance(expr, basestring) for expr in exprs):
3271
                exprs = map(ParserElement._literalStringClass, exprs)
3272
            self.exprs = list(exprs)
3273
        else:
3274
            try:
3275
                self.exprs = list( exprs )
3276
            except TypeError:
3277
                self.exprs = [ exprs ]
3278
        self.callPreparse = False
3279

3280
    def __getitem__( self, i ):
3281
        return self.exprs[i]
3282

3283
    def append( self, other ):
3284
        self.exprs.append( other )
3285
        self.strRepr = None
3286
        return self
3287

3288
    def leaveWhitespace( self ):
3289
        """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
3290
           all contained expressions."""
3291
        self.skipWhitespace = False
3292
        self.exprs = [ e.copy() for e in self.exprs ]
3293
        for e in self.exprs:
3294
            e.leaveWhitespace()
3295
        return self
3296

3297
    def ignore( self, other ):
3298
        if isinstance( other, Suppress ):
3299
            if other not in self.ignoreExprs:
3300
                super( ParseExpression, self).ignore( other )
3301
                for e in self.exprs:
3302
                    e.ignore( self.ignoreExprs[-1] )
3303
        else:
3304
            super( ParseExpression, self).ignore( other )
3305
            for e in self.exprs:
3306
                e.ignore( self.ignoreExprs[-1] )
3307
        return self
3308

3309
    def __str__( self ):
3310
        try:
3311
            return super(ParseExpression,self).__str__()
3312
        except Exception:
3313
            pass
3314

3315
        if self.strRepr is None:
3316
            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
3317
        return self.strRepr
3318

3319
    def streamline( self ):
3320
        super(ParseExpression,self).streamline()
3321

3322
        for e in self.exprs:
3323
            e.streamline()
3324

3325
        # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
3326
        # but only if there are no parse actions or resultsNames on the nested And's
3327
        # (likewise for Or's and MatchFirst's)
3328
        if ( len(self.exprs) == 2 ):
3329
            other = self.exprs[0]
3330
            if ( isinstance( other, self.__class__ ) and
3331
                  not(other.parseAction) and
3332
                  other.resultsName is None and
3333
                  not other.debug ):
3334
                self.exprs = other.exprs[:] + [ self.exprs[1] ]
3335
                self.strRepr = None
3336
                self.mayReturnEmpty |= other.mayReturnEmpty
3337
                self.mayIndexError  |= other.mayIndexError
3338

3339
            other = self.exprs[-1]
3340
            if ( isinstance( other, self.__class__ ) and
3341
                  not(other.parseAction) and
3342
                  other.resultsName is None and
3343
                  not other.debug ):
3344
                self.exprs = self.exprs[:-1] + other.exprs[:]
3345
                self.strRepr = None
3346
                self.mayReturnEmpty |= other.mayReturnEmpty
3347
                self.mayIndexError  |= other.mayIndexError
3348

3349
        self.errmsg = "Expected " + _ustr(self)
3350
        
3351
        return self
3352

3353
    def setResultsName( self, name, listAllMatches=False ):
3354
        ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
3355
        return ret
3356

3357
    def validate( self, validateTrace=[] ):
3358
        tmp = validateTrace[:]+[self]
3359
        for e in self.exprs:
3360
            e.validate(tmp)
3361
        self.checkRecursion( [] )
3362
        
3363
    def copy(self):
3364
        ret = super(ParseExpression,self).copy()
3365
        ret.exprs = [e.copy() for e in self.exprs]
3366
        return ret
3367

3368
class And(ParseExpression):
3369
    """
3370
    Requires all given C{ParseExpression}s to be found in the given order.
3371
    Expressions may be separated by whitespace.
3372
    May be constructed using the C{'+'} operator.
3373
    May also be constructed using the C{'-'} operator, which will suppress backtracking.
3374

3375
    Example::
3376
        integer = Word(nums)
3377
        name_expr = OneOrMore(Word(alphas))
3378

3379
        expr = And([integer("id"),name_expr("name"),integer("age")])
3380
        # more easily written as:
3381
        expr = integer("id") + name_expr("name") + integer("age")
3382
    """
3383

3384
    class _ErrorStop(Empty):
3385
        def __init__(self, *args, **kwargs):
3386
            super(And._ErrorStop,self).__init__(*args, **kwargs)
3387
            self.name = '-'
3388
            self.leaveWhitespace()
3389

3390
    def __init__( self, exprs, savelist = True ):
3391
        super(And,self).__init__(exprs, savelist)
3392
        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3393
        self.setWhitespaceChars( self.exprs[0].whiteChars )
3394
        self.skipWhitespace = self.exprs[0].skipWhitespace
3395
        self.callPreparse = True
3396

3397
    def parseImpl( self, instring, loc, doActions=True ):
3398
        # pass False as last arg to _parse for first element, since we already
3399
        # pre-parsed the string as part of our And pre-parsing
3400
        loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
3401
        errorStop = False
3402
        for e in self.exprs[1:]:
3403
            if isinstance(e, And._ErrorStop):
3404
                errorStop = True
3405
                continue
3406
            if errorStop:
3407
                try:
3408
                    loc, exprtokens = e._parse( instring, loc, doActions )
3409
                except ParseSyntaxException:
3410
                    raise
3411
                except ParseBaseException as pe:
3412
                    pe.__traceback__ = None
3413
                    raise ParseSyntaxException._from_exception(pe)
3414
                except IndexError:
3415
                    raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
3416
            else:
3417
                loc, exprtokens = e._parse( instring, loc, doActions )
3418
            if exprtokens or exprtokens.haskeys():
3419
                resultlist += exprtokens
3420
        return loc, resultlist
3421

3422
    def __iadd__(self, other ):
3423
        if isinstance( other, basestring ):
3424
            other = ParserElement._literalStringClass( other )
3425
        return self.append( other ) #And( [ self, other ] )
3426

3427
    def checkRecursion( self, parseElementList ):
3428
        subRecCheckList = parseElementList[:] + [ self ]
3429
        for e in self.exprs:
3430
            e.checkRecursion( subRecCheckList )
3431
            if not e.mayReturnEmpty:
3432
                break
3433

3434
    def __str__( self ):
3435
        if hasattr(self,"name"):
3436
            return self.name
3437

3438
        if self.strRepr is None:
3439
            self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
3440

3441
        return self.strRepr
3442

3443

3444
class Or(ParseExpression):
3445
    """
3446
    Requires that at least one C{ParseExpression} is found.
3447
    If two expressions match, the expression that matches the longest string will be used.
3448
    May be constructed using the C{'^'} operator.
3449

3450
    Example::
3451
        # construct Or using '^' operator
3452
        
3453
        number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3454
        print(number.searchString("123 3.1416 789"))
3455
    prints::
3456
        [['123'], ['3.1416'], ['789']]
3457
    """
3458
    def __init__( self, exprs, savelist = False ):
3459
        super(Or,self).__init__(exprs, savelist)
3460
        if self.exprs:
3461
            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3462
        else:
3463
            self.mayReturnEmpty = True
3464

3465
    def parseImpl( self, instring, loc, doActions=True ):
3466
        maxExcLoc = -1
3467
        maxException = None
3468
        matches = []
3469
        for e in self.exprs:
3470
            try:
3471
                loc2 = e.tryParse( instring, loc )
3472
            except ParseException as err:
3473
                err.__traceback__ = None
3474
                if err.loc > maxExcLoc:
3475
                    maxException = err
3476
                    maxExcLoc = err.loc
3477
            except IndexError:
3478
                if len(instring) > maxExcLoc:
3479
                    maxException = ParseException(instring,len(instring),e.errmsg,self)
3480
                    maxExcLoc = len(instring)
3481
            else:
3482
                # save match among all matches, to retry longest to shortest
3483
                matches.append((loc2, e))
3484

3485
        if matches:
3486
            matches.sort(key=lambda x: -x[0])
3487
            for _,e in matches:
3488
                try:
3489
                    return e._parse( instring, loc, doActions )
3490
                except ParseException as err:
3491
                    err.__traceback__ = None
3492
                    if err.loc > maxExcLoc:
3493
                        maxException = err
3494
                        maxExcLoc = err.loc
3495

3496
        if maxException is not None:
3497
            maxException.msg = self.errmsg
3498
            raise maxException
3499
        else:
3500
            raise ParseException(instring, loc, "no defined alternatives to match", self)
3501

3502

3503
    def __ixor__(self, other ):
3504
        if isinstance( other, basestring ):
3505
            other = ParserElement._literalStringClass( other )
3506
        return self.append( other ) #Or( [ self, other ] )
3507

3508
    def __str__( self ):
3509
        if hasattr(self,"name"):
3510
            return self.name
3511

3512
        if self.strRepr is None:
3513
            self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
3514

3515
        return self.strRepr
3516

3517
    def checkRecursion( self, parseElementList ):
3518
        subRecCheckList = parseElementList[:] + [ self ]
3519
        for e in self.exprs:
3520
            e.checkRecursion( subRecCheckList )
3521

3522

3523
class MatchFirst(ParseExpression):
3524
    """
3525
    Requires that at least one C{ParseExpression} is found.
3526
    If two expressions match, the first one listed is the one that will match.
3527
    May be constructed using the C{'|'} operator.
3528

3529
    Example::
3530
        # construct MatchFirst using '|' operator
3531
        
3532
        # watch the order of expressions to match
3533
        number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
3534
        print(number.searchString("123 3.1416 789")) #  Fail! -> [['123'], ['3'], ['1416'], ['789']]
3535

3536
        # put more selective expression first
3537
        number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
3538
        print(number.searchString("123 3.1416 789")) #  Better -> [['123'], ['3.1416'], ['789']]
3539
    """
3540
    def __init__( self, exprs, savelist = False ):
3541
        super(MatchFirst,self).__init__(exprs, savelist)
3542
        if self.exprs:
3543
            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3544
        else:
3545
            self.mayReturnEmpty = True
3546

3547
    def parseImpl( self, instring, loc, doActions=True ):
3548
        maxExcLoc = -1
3549
        maxException = None
3550
        for e in self.exprs:
3551
            try:
3552
                ret = e._parse( instring, loc, doActions )
3553
                return ret
3554
            except ParseException as err:
3555
                if err.loc > maxExcLoc:
3556
                    maxException = err
3557
                    maxExcLoc = err.loc
3558
            except IndexError:
3559
                if len(instring) > maxExcLoc:
3560
                    maxException = ParseException(instring,len(instring),e.errmsg,self)
3561
                    maxExcLoc = len(instring)
3562

3563
        # only got here if no expression matched, raise exception for match that made it the furthest
3564
        else:
3565
            if maxException is not None:
3566
                maxException.msg = self.errmsg
3567
                raise maxException
3568
            else:
3569
                raise ParseException(instring, loc, "no defined alternatives to match", self)
3570

3571
    def __ior__(self, other ):
3572
        if isinstance( other, basestring ):
3573
            other = ParserElement._literalStringClass( other )
3574
        return self.append( other ) #MatchFirst( [ self, other ] )
3575

3576
    def __str__( self ):
3577
        if hasattr(self,"name"):
3578
            return self.name
3579

3580
        if self.strRepr is None:
3581
            self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
3582

3583
        return self.strRepr
3584

3585
    def checkRecursion( self, parseElementList ):
3586
        subRecCheckList = parseElementList[:] + [ self ]
3587
        for e in self.exprs:
3588
            e.checkRecursion( subRecCheckList )
3589

3590

3591
class Each(ParseExpression):
3592
    """
3593
    Requires all given C{ParseExpression}s to be found, but in any order.
3594
    Expressions may be separated by whitespace.
3595
    May be constructed using the C{'&'} operator.
3596

3597
    Example::
3598
        color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
3599
        shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
3600
        integer = Word(nums)
3601
        shape_attr = "shape:" + shape_type("shape")
3602
        posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
3603
        color_attr = "color:" + color("color")
3604
        size_attr = "size:" + integer("size")
3605

3606
        # use Each (using operator '&') to accept attributes in any order 
3607
        # (shape and posn are required, color and size are optional)
3608
        shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
3609

3610
        shape_spec.runTests('''
3611
            shape: SQUARE color: BLACK posn: 100, 120
3612
            shape: CIRCLE size: 50 color: BLUE posn: 50,80
3613
            color:GREEN size:20 shape:TRIANGLE posn:20,40
3614
            '''
3615
            )
3616
    prints::
3617
        shape: SQUARE color: BLACK posn: 100, 120
3618
        ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
3619
        - color: BLACK
3620
        - posn: ['100', ',', '120']
3621
          - x: 100
3622
          - y: 120
3623
        - shape: SQUARE
3624

3625

3626
        shape: CIRCLE size: 50 color: BLUE posn: 50,80
3627
        ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
3628
        - color: BLUE
3629
        - posn: ['50', ',', '80']
3630
          - x: 50
3631
          - y: 80
3632
        - shape: CIRCLE
3633
        - size: 50
3634

3635

3636
        color: GREEN size: 20 shape: TRIANGLE posn: 20,40
3637
        ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
3638
        - color: GREEN
3639
        - posn: ['20', ',', '40']
3640
          - x: 20
3641
          - y: 40
3642
        - shape: TRIANGLE
3643
        - size: 20
3644
    """
3645
    def __init__( self, exprs, savelist = True ):
3646
        super(Each,self).__init__(exprs, savelist)
3647
        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3648
        self.skipWhitespace = True
3649
        self.initExprGroups = True
3650

3651
    def parseImpl( self, instring, loc, doActions=True ):
3652
        if self.initExprGroups:
3653
            self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
3654
            opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
3655
            opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
3656
            self.optionals = opt1 + opt2
3657
            self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
3658
            self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
3659
            self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
3660
            self.required += self.multirequired
3661
            self.initExprGroups = False
3662
        tmpLoc = loc
3663
        tmpReqd = self.required[:]
3664
        tmpOpt  = self.optionals[:]
3665
        matchOrder = []
3666

3667
        keepMatching = True
3668
        while keepMatching:
3669
            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
3670
            failed = []
3671
            for e in tmpExprs:
3672
                try:
3673
                    tmpLoc = e.tryParse( instring, tmpLoc )
3674
                except ParseException:
3675
                    failed.append(e)
3676
                else:
3677
                    matchOrder.append(self.opt1map.get(id(e),e))
3678
                    if e in tmpReqd:
3679
                        tmpReqd.remove(e)
3680
                    elif e in tmpOpt:
3681
                        tmpOpt.remove(e)
3682
            if len(failed) == len(tmpExprs):
3683
                keepMatching = False
3684

3685
        if tmpReqd:
3686
            missing = ", ".join(_ustr(e) for e in tmpReqd)
3687
            raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
3688

3689
        # add any unmatched Optionals, in case they have default values defined
3690
        matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
3691

3692
        resultlist = []
3693
        for e in matchOrder:
3694
            loc,results = e._parse(instring,loc,doActions)
3695
            resultlist.append(results)
3696

3697
        finalResults = sum(resultlist, ParseResults([]))
3698
        return loc, finalResults
3699

3700
    def __str__( self ):
3701
        if hasattr(self,"name"):
3702
            return self.name
3703

3704
        if self.strRepr is None:
3705
            self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
3706

3707
        return self.strRepr
3708

3709
    def checkRecursion( self, parseElementList ):
3710
        subRecCheckList = parseElementList[:] + [ self ]
3711
        for e in self.exprs:
3712
            e.checkRecursion( subRecCheckList )
3713

3714

3715
class ParseElementEnhance(ParserElement):
3716
    """
3717
    Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
3718
    """
3719
    def __init__( self, expr, savelist=False ):
3720
        super(ParseElementEnhance,self).__init__(savelist)
3721
        if isinstance( expr, basestring ):
3722
            if issubclass(ParserElement._literalStringClass, Token):
3723
                expr = ParserElement._literalStringClass(expr)
3724
            else:
3725
                expr = ParserElement._literalStringClass(Literal(expr))
3726
        self.expr = expr
3727
        self.strRepr = None
3728
        if expr is not None:
3729
            self.mayIndexError = expr.mayIndexError
3730
            self.mayReturnEmpty = expr.mayReturnEmpty
3731
            self.setWhitespaceChars( expr.whiteChars )
3732
            self.skipWhitespace = expr.skipWhitespace
3733
            self.saveAsList = expr.saveAsList
3734
            self.callPreparse = expr.callPreparse
3735
            self.ignoreExprs.extend(expr.ignoreExprs)
3736

3737
    def parseImpl( self, instring, loc, doActions=True ):
3738
        if self.expr is not None:
3739
            return self.expr._parse( instring, loc, doActions, callPreParse=False )
3740
        else:
3741
            raise ParseException("",loc,self.errmsg,self)
3742

3743
    def leaveWhitespace( self ):
3744
        self.skipWhitespace = False
3745
        self.expr = self.expr.copy()
3746
        if self.expr is not None:
3747
            self.expr.leaveWhitespace()
3748
        return self
3749

3750
    def ignore( self, other ):
3751
        if isinstance( other, Suppress ):
3752
            if other not in self.ignoreExprs:
3753
                super( ParseElementEnhance, self).ignore( other )
3754
                if self.expr is not None:
3755
                    self.expr.ignore( self.ignoreExprs[-1] )
3756
        else:
3757
            super( ParseElementEnhance, self).ignore( other )
3758
            if self.expr is not None:
3759
                self.expr.ignore( self.ignoreExprs[-1] )
3760
        return self
3761

3762
    def streamline( self ):
3763
        super(ParseElementEnhance,self).streamline()
3764
        if self.expr is not None:
3765
            self.expr.streamline()
3766
        return self
3767

3768
    def checkRecursion( self, parseElementList ):
3769
        if self in parseElementList:
3770
            raise RecursiveGrammarException( parseElementList+[self] )
3771
        subRecCheckList = parseElementList[:] + [ self ]
3772
        if self.expr is not None:
3773
            self.expr.checkRecursion( subRecCheckList )
3774

3775
    def validate( self, validateTrace=[] ):
3776
        tmp = validateTrace[:]+[self]
3777
        if self.expr is not None:
3778
            self.expr.validate(tmp)
3779
        self.checkRecursion( [] )
3780

3781
    def __str__( self ):
3782
        try:
3783
            return super(ParseElementEnhance,self).__str__()
3784
        except Exception:
3785
            pass
3786

3787
        if self.strRepr is None and self.expr is not None:
3788
            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
3789
        return self.strRepr
3790

3791

3792
class FollowedBy(ParseElementEnhance):
3793
    """
3794
    Lookahead matching of the given parse expression.  C{FollowedBy}
3795
    does I{not} advance the parsing position within the input string, it only
3796
    verifies that the specified parse expression matches at the current
3797
    position.  C{FollowedBy} always returns a null token list.
3798

3799
    Example::
3800
        # use FollowedBy to match a label only if it is followed by a ':'
3801
        data_word = Word(alphas)
3802
        label = data_word + FollowedBy(':')
3803
        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3804
        
3805
        OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
3806
    prints::
3807
        [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
3808
    """
3809
    def __init__( self, expr ):
3810
        super(FollowedBy,self).__init__(expr)
3811
        self.mayReturnEmpty = True
3812

3813
    def parseImpl( self, instring, loc, doActions=True ):
3814
        self.expr.tryParse( instring, loc )
3815
        return loc, []
3816

3817

3818
class NotAny(ParseElementEnhance):
3819
    """
3820
    Lookahead to disallow matching with the given parse expression.  C{NotAny}
3821
    does I{not} advance the parsing position within the input string, it only
3822
    verifies that the specified parse expression does I{not} match at the current
3823
    position.  Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
3824
    always returns a null token list.  May be constructed using the '~' operator.
3825

3826
    Example::
3827
        
3828
    """
3829
    def __init__( self, expr ):
3830
        super(NotAny,self).__init__(expr)
3831
        #~ self.leaveWhitespace()
3832
        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
3833
        self.mayReturnEmpty = True
3834
        self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3835

3836
    def parseImpl( self, instring, loc, doActions=True ):
3837
        if self.expr.canParseNext(instring, loc):
3838
            raise ParseException(instring, loc, self.errmsg, self)
3839
        return loc, []
3840

3841
    def __str__( self ):
3842
        if hasattr(self,"name"):
3843
            return self.name
3844

3845
        if self.strRepr is None:
3846
            self.strRepr = "~{" + _ustr(self.expr) + "}"
3847

3848
        return self.strRepr
3849

3850
class _MultipleMatch(ParseElementEnhance):
3851
    def __init__( self, expr, stopOn=None):
3852
        super(_MultipleMatch, self).__init__(expr)
3853
        self.saveAsList = True
3854
        ender = stopOn
3855
        if isinstance(ender, basestring):
3856
            ender = ParserElement._literalStringClass(ender)
3857
        self.not_ender = ~ender if ender is not None else None
3858

3859
    def parseImpl( self, instring, loc, doActions=True ):
3860
        self_expr_parse = self.expr._parse
3861
        self_skip_ignorables = self._skipIgnorables
3862
        check_ender = self.not_ender is not None
3863
        if check_ender:
3864
            try_not_ender = self.not_ender.tryParse
3865
        
3866
        # must be at least one (but first see if we are the stopOn sentinel;
3867
        # if so, fail)
3868
        if check_ender:
3869
            try_not_ender(instring, loc)
3870
        loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
3871
        try:
3872
            hasIgnoreExprs = (not not self.ignoreExprs)
3873
            while 1:
3874
                if check_ender:
3875
                    try_not_ender(instring, loc)
3876
                if hasIgnoreExprs:
3877
                    preloc = self_skip_ignorables( instring, loc )
3878
                else:
3879
                    preloc = loc
3880
                loc, tmptokens = self_expr_parse( instring, preloc, doActions )
3881
                if tmptokens or tmptokens.haskeys():
3882
                    tokens += tmptokens
3883
        except (ParseException,IndexError):
3884
            pass
3885

3886
        return loc, tokens
3887
        
3888
class OneOrMore(_MultipleMatch):
3889
    """
3890
    Repetition of one or more of the given expression.
3891
    
3892
    Parameters:
3893
     - expr - expression that must match one or more times
3894
     - stopOn - (default=C{None}) - expression for a terminating sentinel
3895
          (only required if the sentinel would ordinarily match the repetition 
3896
          expression)          
3897

3898
    Example::
3899
        data_word = Word(alphas)
3900
        label = data_word + FollowedBy(':')
3901
        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
3902

3903
        text = "shape: SQUARE posn: upper left color: BLACK"
3904
        OneOrMore(attr_expr).parseString(text).pprint()  # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
3905

3906
        # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
3907
        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3908
        OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
3909
        
3910
        # could also be written as
3911
        (attr_expr * (1,)).parseString(text).pprint()
3912
    """
3913

3914
    def __str__( self ):
3915
        if hasattr(self,"name"):
3916
            return self.name
3917

3918
        if self.strRepr is None:
3919
            self.strRepr = "{" + _ustr(self.expr) + "}..."
3920

3921
        return self.strRepr
3922

3923
class ZeroOrMore(_MultipleMatch):
3924
    """
3925
    Optional repetition of zero or more of the given expression.
3926
    
3927
    Parameters:
3928
     - expr - expression that must match zero or more times
3929
     - stopOn - (default=C{None}) - expression for a terminating sentinel
3930
          (only required if the sentinel would ordinarily match the repetition 
3931
          expression)          
3932

3933
    Example: similar to L{OneOrMore}
3934
    """
3935
    def __init__( self, expr, stopOn=None):
3936
        super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
3937
        self.mayReturnEmpty = True
3938
        
3939
    def parseImpl( self, instring, loc, doActions=True ):
3940
        try:
3941
            return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
3942
        except (ParseException,IndexError):
3943
            return loc, []
3944

3945
    def __str__( self ):
3946
        if hasattr(self,"name"):
3947
            return self.name
3948

3949
        if self.strRepr is None:
3950
            self.strRepr = "[" + _ustr(self.expr) + "]..."
3951

3952
        return self.strRepr
3953

3954
class _NullToken(object):
3955
    def __bool__(self):
3956
        return False
3957
    __nonzero__ = __bool__
3958
    def __str__(self):
3959
        return ""
3960

3961
_optionalNotMatched = _NullToken()
3962
class Optional(ParseElementEnhance):
3963
    """
3964
    Optional matching of the given expression.
3965

3966
    Parameters:
3967
     - expr - expression that must match zero or more times
3968
     - default (optional) - value to be returned if the optional expression is not found.
3969

3970
    Example::
3971
        # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
3972
        zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
3973
        zip.runTests('''
3974
            # traditional ZIP code
3975
            12345
3976
            
3977
            # ZIP+4 form
3978
            12101-0001
3979
            
3980
            # invalid ZIP
3981
            98765-
3982
            ''')
3983
    prints::
3984
        # traditional ZIP code
3985
        12345
3986
        ['12345']
3987

3988
        # ZIP+4 form
3989
        12101-0001
3990
        ['12101-0001']
3991

3992
        # invalid ZIP
3993
        98765-
3994
             ^
3995
        FAIL: Expected end of text (at char 5), (line:1, col:6)
3996
    """
3997
    def __init__( self, expr, default=_optionalNotMatched ):
3998
        super(Optional,self).__init__( expr, savelist=False )
3999
        self.saveAsList = self.expr.saveAsList
4000
        self.defaultValue = default
4001
        self.mayReturnEmpty = True
4002

4003
    def parseImpl( self, instring, loc, doActions=True ):
4004
        try:
4005
            loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
4006
        except (ParseException,IndexError):
4007
            if self.defaultValue is not _optionalNotMatched:
4008
                if self.expr.resultsName:
4009
                    tokens = ParseResults([ self.defaultValue ])
4010
                    tokens[self.expr.resultsName] = self.defaultValue
4011
                else:
4012
                    tokens = [ self.defaultValue ]
4013
            else:
4014
                tokens = []
4015
        return loc, tokens
4016

4017
    def __str__( self ):
4018
        if hasattr(self,"name"):
4019
            return self.name
4020

4021
        if self.strRepr is None:
4022
            self.strRepr = "[" + _ustr(self.expr) + "]"
4023

4024
        return self.strRepr
4025

4026
class SkipTo(ParseElementEnhance):
4027
    """
4028
    Token for skipping over all undefined text until the matched expression is found.
4029

4030
    Parameters:
4031
     - expr - target expression marking the end of the data to be skipped
4032
     - include - (default=C{False}) if True, the target expression is also parsed 
4033
          (the skipped text and target expression are returned as a 2-element list).
4034
     - ignore - (default=C{None}) used to define grammars (typically quoted strings and 
4035
          comments) that might contain false matches to the target expression
4036
     - failOn - (default=C{None}) define expressions that are not allowed to be 
4037
          included in the skipped test; if found before the target expression is found, 
4038
          the SkipTo is not a match
4039

4040
    Example::
4041
        report = '''
4042
            Outstanding Issues Report - 1 Jan 2000
4043

4044
               # | Severity | Description                               |  Days Open
4045
            -----+----------+-------------------------------------------+-----------
4046
             101 | Critical | Intermittent system crash                 |          6
4047
              94 | Cosmetic | Spelling error on Login ('log|n')         |         14
4048
              79 | Minor    | System slow when running too many reports |         47
4049
            '''
4050
        integer = Word(nums)
4051
        SEP = Suppress('|')
4052
        # use SkipTo to simply match everything up until the next SEP
4053
        # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4054
        # - parse action will call token.strip() for each matched token, i.e., the description body
4055
        string_data = SkipTo(SEP, ignore=quotedString)
4056
        string_data.setParseAction(tokenMap(str.strip))
4057
        ticket_expr = (integer("issue_num") + SEP 
4058
                      + string_data("sev") + SEP 
4059
                      + string_data("desc") + SEP 
4060
                      + integer("days_open"))
4061
        
4062
        for tkt in ticket_expr.searchString(report):
4063
            print tkt.dump()
4064
    prints::
4065
        ['101', 'Critical', 'Intermittent system crash', '6']
4066
        - days_open: 6
4067
        - desc: Intermittent system crash
4068
        - issue_num: 101
4069
        - sev: Critical
4070
        ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4071
        - days_open: 14
4072
        - desc: Spelling error on Login ('log|n')
4073
        - issue_num: 94
4074
        - sev: Cosmetic
4075
        ['79', 'Minor', 'System slow when running too many reports', '47']
4076
        - days_open: 47
4077
        - desc: System slow when running too many reports
4078
        - issue_num: 79
4079
        - sev: Minor
4080
    """
4081
    def __init__( self, other, include=False, ignore=None, failOn=None ):
4082
        super( SkipTo, self ).__init__( other )
4083
        self.ignoreExpr = ignore
4084
        self.mayReturnEmpty = True
4085
        self.mayIndexError = False
4086
        self.includeMatch = include
4087
        self.asList = False
4088
        if isinstance(failOn, basestring):
4089
            self.failOn = ParserElement._literalStringClass(failOn)
4090
        else:
4091
            self.failOn = failOn
4092
        self.errmsg = "No match found for "+_ustr(self.expr)
4093

4094
    def parseImpl( self, instring, loc, doActions=True ):
4095
        startloc = loc
4096
        instrlen = len(instring)
4097
        expr = self.expr
4098
        expr_parse = self.expr._parse
4099
        self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4100
        self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4101
        
4102
        tmploc = loc
4103
        while tmploc <= instrlen:
4104
            if self_failOn_canParseNext is not None:
4105
                # break if failOn expression matches
4106
                if self_failOn_canParseNext(instring, tmploc):
4107
                    break
4108
                    
4109
            if self_ignoreExpr_tryParse is not None:
4110
                # advance past ignore expressions
4111
                while 1:
4112
                    try:
4113
                        tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4114
                    except ParseBaseException:
4115
                        break
4116
            
4117
            try:
4118
                expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4119
            except (ParseException, IndexError):
4120
                # no match, advance loc in string
4121
                tmploc += 1
4122
            else:
4123
                # matched skipto expr, done
4124
                break
4125

4126
        else:
4127
            # ran off the end of the input string without matching skipto expr, fail
4128
            raise ParseException(instring, loc, self.errmsg, self)
4129

4130
        # build up return values
4131
        loc = tmploc
4132
        skiptext = instring[startloc:loc]
4133
        skipresult = ParseResults(skiptext)
4134
        
4135
        if self.includeMatch:
4136
            loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
4137
            skipresult += mat
4138

4139
        return loc, skipresult
4140

4141
class Forward(ParseElementEnhance):
4142
    """
4143
    Forward declaration of an expression to be defined later -
4144
    used for recursive grammars, such as algebraic infix notation.
4145
    When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
4146

4147
    Note: take care when assigning to C{Forward} not to overlook precedence of operators.
4148
    Specifically, '|' has a lower precedence than '<<', so that::
4149
        fwdExpr << a | b | c
4150
    will actually be evaluated as::
4151
        (fwdExpr << a) | b | c
4152
    thereby leaving b and c out as parseable alternatives.  It is recommended that you
4153
    explicitly group the values inserted into the C{Forward}::
4154
        fwdExpr << (a | b | c)
4155
    Converting to use the '<<=' operator instead will avoid this problem.
4156

4157
    See L{ParseResults.pprint} for an example of a recursive parser created using
4158
    C{Forward}.
4159
    """
4160
    def __init__( self, other=None ):
4161
        super(Forward,self).__init__( other, savelist=False )
4162

4163
    def __lshift__( self, other ):
4164
        if isinstance( other, basestring ):
4165
            other = ParserElement._literalStringClass(other)
4166
        self.expr = other
4167
        self.strRepr = None
4168
        self.mayIndexError = self.expr.mayIndexError
4169
        self.mayReturnEmpty = self.expr.mayReturnEmpty
4170
        self.setWhitespaceChars( self.expr.whiteChars )
4171
        self.skipWhitespace = self.expr.skipWhitespace
4172
        self.saveAsList = self.expr.saveAsList
4173
        self.ignoreExprs.extend(self.expr.ignoreExprs)
4174
        return self
4175
        
4176
    def __ilshift__(self, other):
4177
        return self << other
4178
    
4179
    def leaveWhitespace( self ):
4180
        self.skipWhitespace = False
4181
        return self
4182

4183
    def streamline( self ):
4184
        if not self.streamlined:
4185
            self.streamlined = True
4186
            if self.expr is not None:
4187
                self.expr.streamline()
4188
        return self
4189

4190
    def validate( self, validateTrace=[] ):
4191
        if self not in validateTrace:
4192
            tmp = validateTrace[:]+[self]
4193
            if self.expr is not None:
4194
                self.expr.validate(tmp)
4195
        self.checkRecursion([])
4196

4197
    def __str__( self ):
4198
        if hasattr(self,"name"):
4199
            return self.name
4200
        return self.__class__.__name__ + ": ..."
4201

4202
        # stubbed out for now - creates awful memory and perf issues
4203
        self._revertClass = self.__class__
4204
        self.__class__ = _ForwardNoRecurse
4205
        try:
4206
            if self.expr is not None:
4207
                retString = _ustr(self.expr)
4208
            else:
4209
                retString = "None"
4210
        finally:
4211
            self.__class__ = self._revertClass
4212
        return self.__class__.__name__ + ": " + retString
4213

4214
    def copy(self):
4215
        if self.expr is not None:
4216
            return super(Forward,self).copy()
4217
        else:
4218
            ret = Forward()
4219
            ret <<= self
4220
            return ret
4221

4222
class _ForwardNoRecurse(Forward):
4223
    def __str__( self ):
4224
        return "..."
4225

4226
class TokenConverter(ParseElementEnhance):
4227
    """
4228
    Abstract subclass of C{ParseExpression}, for converting parsed results.
4229
    """
4230
    def __init__( self, expr, savelist=False ):
4231
        super(TokenConverter,self).__init__( expr )#, savelist )
4232
        self.saveAsList = False
4233

4234
class Combine(TokenConverter):
4235
    """
4236
    Converter to concatenate all matching tokens to a single string.
4237
    By default, the matching patterns must also be contiguous in the input string;
4238
    this can be disabled by specifying C{'adjacent=False'} in the constructor.
4239

4240
    Example::
4241
        real = Word(nums) + '.' + Word(nums)
4242
        print(real.parseString('3.1416')) # -> ['3', '.', '1416']
4243
        # will also erroneously match the following
4244
        print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
4245

4246
        real = Combine(Word(nums) + '.' + Word(nums))
4247
        print(real.parseString('3.1416')) # -> ['3.1416']
4248
        # no match when there are internal spaces
4249
        print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
4250
    """
4251
    def __init__( self, expr, joinString="", adjacent=True ):
4252
        super(Combine,self).__init__( expr )
4253
        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
4254
        if adjacent:
4255
            self.leaveWhitespace()
4256
        self.adjacent = adjacent
4257
        self.skipWhitespace = True
4258
        self.joinString = joinString
4259
        self.callPreparse = True
4260

4261
    def ignore( self, other ):
4262
        if self.adjacent:
4263
            ParserElement.ignore(self, other)
4264
        else:
4265
            super( Combine, self).ignore( other )
4266
        return self
4267

4268
    def postParse( self, instring, loc, tokenlist ):
4269
        retToks = tokenlist.copy()
4270
        del retToks[:]
4271
        retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
4272

4273
        if self.resultsName and retToks.haskeys():
4274
            return [ retToks ]
4275
        else:
4276
            return retToks
4277

4278
class Group(TokenConverter):
4279
    """
4280
    Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
4281

4282
    Example::
4283
        ident = Word(alphas)
4284
        num = Word(nums)
4285
        term = ident | num
4286
        func = ident + Optional(delimitedList(term))
4287
        print(func.parseString("fn a,b,100"))  # -> ['fn', 'a', 'b', '100']
4288

4289
        func = ident + Group(Optional(delimitedList(term)))
4290
        print(func.parseString("fn a,b,100"))  # -> ['fn', ['a', 'b', '100']]
4291
    """
4292
    def __init__( self, expr ):
4293
        super(Group,self).__init__( expr )
4294
        self.saveAsList = True
4295

4296
    def postParse( self, instring, loc, tokenlist ):
4297
        return [ tokenlist ]
4298

4299
class Dict(TokenConverter):
4300
    """
4301
    Converter to return a repetitive expression as a list, but also as a dictionary.
4302
    Each element can also be referenced using the first token in the expression as its key.
4303
    Useful for tabular report scraping when the first column can be used as a item key.
4304

4305
    Example::
4306
        data_word = Word(alphas)
4307
        label = data_word + FollowedBy(':')
4308
        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4309

4310
        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4311
        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4312
        
4313
        # print attributes as plain groups
4314
        print(OneOrMore(attr_expr).parseString(text).dump())
4315
        
4316
        # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
4317
        result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
4318
        print(result.dump())
4319
        
4320
        # access named fields as dict entries, or output as dict
4321
        print(result['shape'])        
4322
        print(result.asDict())
4323
    prints::
4324
        ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
4325

4326
        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4327
        - color: light blue
4328
        - posn: upper left
4329
        - shape: SQUARE
4330
        - texture: burlap
4331
        SQUARE
4332
        {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
4333
    See more examples at L{ParseResults} of accessing fields by results name.
4334
    """
4335
    def __init__( self, expr ):
4336
        super(Dict,self).__init__( expr )
4337
        self.saveAsList = True
4338

4339
    def postParse( self, instring, loc, tokenlist ):
4340
        for i,tok in enumerate(tokenlist):
4341
            if len(tok) == 0:
4342
                continue
4343
            ikey = tok[0]
4344
            if isinstance(ikey,int):
4345
                ikey = _ustr(tok[0]).strip()
4346
            if len(tok)==1:
4347
                tokenlist[ikey] = _ParseResultsWithOffset("",i)
4348
            elif len(tok)==2 and not isinstance(tok[1],ParseResults):
4349
                tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
4350
            else:
4351
                dictvalue = tok.copy() #ParseResults(i)
4352
                del dictvalue[0]
4353
                if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
4354
                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
4355
                else:
4356
                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
4357

4358
        if self.resultsName:
4359
            return [ tokenlist ]
4360
        else:
4361
            return tokenlist
4362

4363

4364
class Suppress(TokenConverter):
4365
    """
4366
    Converter for ignoring the results of a parsed expression.
4367

4368
    Example::
4369
        source = "a, b, c,d"
4370
        wd = Word(alphas)
4371
        wd_list1 = wd + ZeroOrMore(',' + wd)
4372
        print(wd_list1.parseString(source))
4373

4374
        # often, delimiters that are useful during parsing are just in the
4375
        # way afterward - use Suppress to keep them out of the parsed output
4376
        wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
4377
        print(wd_list2.parseString(source))
4378
    prints::
4379
        ['a', ',', 'b', ',', 'c', ',', 'd']
4380
        ['a', 'b', 'c', 'd']
4381
    (See also L{delimitedList}.)
4382
    """
4383
    def postParse( self, instring, loc, tokenlist ):
4384
        return []
4385

4386
    def suppress( self ):
4387
        return self
4388

4389

4390
class OnlyOnce(object):
4391
    """
4392
    Wrapper for parse actions, to ensure they are only called once.
4393
    """
4394
    def __init__(self, methodCall):
4395
        self.callable = _trim_arity(methodCall)
4396
        self.called = False
4397
    def __call__(self,s,l,t):
4398
        if not self.called:
4399
            results = self.callable(s,l,t)
4400
            self.called = True
4401
            return results
4402
        raise ParseException(s,l,"")
4403
    def reset(self):
4404
        self.called = False
4405

4406
def traceParseAction(f):
4407
    """
4408
    Decorator for debugging parse actions. 
4409
    
4410
    When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
4411
    When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
4412

4413
    Example::
4414
        wd = Word(alphas)
4415

4416
        @traceParseAction
4417
        def remove_duplicate_chars(tokens):
4418
            return ''.join(sorted(set(''.join(tokens))))
4419

4420
        wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
4421
        print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
4422
    prints::
4423
        >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
4424
        <<leaving remove_duplicate_chars (ret: 'dfjkls')
4425
        ['dfjkls']
4426
    """
4427
    f = _trim_arity(f)
4428
    def z(*paArgs):
4429
        thisFunc = f.__name__
4430
        s,l,t = paArgs[-3:]
4431
        if len(paArgs)>3:
4432
            thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
4433
        sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
4434
        try:
4435
            ret = f(*paArgs)
4436
        except Exception as exc:
4437
            sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
4438
            raise
4439
        sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
4440
        return ret
4441
    try:
4442
        z.__name__ = f.__name__
4443
    except AttributeError:
4444
        pass
4445
    return z
4446

4447
#
4448
# global helpers
4449
#
4450
def delimitedList( expr, delim=",", combine=False ):
4451
    """
4452
    Helper to define a delimited list of expressions - the delimiter defaults to ','.
4453
    By default, the list elements and delimiters can have intervening whitespace, and
4454
    comments, but this can be overridden by passing C{combine=True} in the constructor.
4455
    If C{combine} is set to C{True}, the matching tokens are returned as a single token
4456
    string, with the delimiters included; otherwise, the matching tokens are returned
4457
    as a list of tokens, with the delimiters suppressed.
4458

4459
    Example::
4460
        delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
4461
        delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
4462
    """
4463
    dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
4464
    if combine:
4465
        return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
4466
    else:
4467
        return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4468

4469
def countedArray( expr, intExpr=None ):
4470
    """
4471
    Helper to define a counted list of expressions.
4472
    This helper defines a pattern of the form::
4473
        integer expr expr expr...
4474
    where the leading integer tells how many expr expressions follow.
4475
    The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
4476
    
4477
    If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
4478

4479
    Example::
4480
        countedArray(Word(alphas)).parseString('2 ab cd ef')  # -> ['ab', 'cd']
4481

4482
        # in this parser, the leading integer value is given in binary,
4483
        # '10' indicating that 2 values are in the array
4484
        binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
4485
        countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef')  # -> ['ab', 'cd']
4486
    """
4487
    arrayExpr = Forward()
4488
    def countFieldParseAction(s,l,t):
4489
        n = t[0]
4490
        arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
4491
        return []
4492
    if intExpr is None:
4493
        intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
4494
    else:
4495
        intExpr = intExpr.copy()
4496
    intExpr.setName("arrayLen")
4497
    intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
4498
    return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
4499

4500
def _flatten(L):
4501
    ret = []
4502
    for i in L:
4503
        if isinstance(i,list):
4504
            ret.extend(_flatten(i))
4505
        else:
4506
            ret.append(i)
4507
    return ret
4508

4509
def matchPreviousLiteral(expr):
4510
    """
4511
    Helper to define an expression that is indirectly defined from
4512
    the tokens matched in a previous expression, that is, it looks
4513
    for a 'repeat' of a previous expression.  For example::
4514
        first = Word(nums)
4515
        second = matchPreviousLiteral(first)
4516
        matchExpr = first + ":" + second
4517
    will match C{"1:1"}, but not C{"1:2"}.  Because this matches a
4518
    previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
4519
    If this is not desired, use C{matchPreviousExpr}.
4520
    Do I{not} use with packrat parsing enabled.
4521
    """
4522
    rep = Forward()
4523
    def copyTokenToRepeater(s,l,t):
4524
        if t:
4525
            if len(t) == 1:
4526
                rep << t[0]
4527
            else:
4528
                # flatten t tokens
4529
                tflat = _flatten(t.asList())
4530
                rep << And(Literal(tt) for tt in tflat)
4531
        else:
4532
            rep << Empty()
4533
    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4534
    rep.setName('(prev) ' + _ustr(expr))
4535
    return rep
4536

4537
def matchPreviousExpr(expr):
4538
    """
4539
    Helper to define an expression that is indirectly defined from
4540
    the tokens matched in a previous expression, that is, it looks
4541
    for a 'repeat' of a previous expression.  For example::
4542
        first = Word(nums)
4543
        second = matchPreviousExpr(first)
4544
        matchExpr = first + ":" + second
4545
    will match C{"1:1"}, but not C{"1:2"}.  Because this matches by
4546
    expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
4547
    the expressions are evaluated first, and then compared, so
4548
    C{"1"} is compared with C{"10"}.
4549
    Do I{not} use with packrat parsing enabled.
4550
    """
4551
    rep = Forward()
4552
    e2 = expr.copy()
4553
    rep <<= e2
4554
    def copyTokenToRepeater(s,l,t):
4555
        matchTokens = _flatten(t.asList())
4556
        def mustMatchTheseTokens(s,l,t):
4557
            theseTokens = _flatten(t.asList())
4558
            if  theseTokens != matchTokens:
4559
                raise ParseException("",0,"")
4560
        rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
4561
    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4562
    rep.setName('(prev) ' + _ustr(expr))
4563
    return rep
4564

4565
def _escapeRegexRangeChars(s):
4566
    #~  escape these chars: ^-]
4567
    for c in r"\^-]":
4568
        s = s.replace(c,_bslash+c)
4569
    s = s.replace("\n",r"\n")
4570
    s = s.replace("\t",r"\t")
4571
    return _ustr(s)
4572

4573
def oneOf( strs, caseless=False, useRegex=True ):
4574
    """
4575
    Helper to quickly define a set of alternative Literals, and makes sure to do
4576
    longest-first testing when there is a conflict, regardless of the input order,
4577
    but returns a C{L{MatchFirst}} for best performance.
4578

4579
    Parameters:
4580
     - strs - a string of space-delimited literals, or a collection of string literals
4581
     - caseless - (default=C{False}) - treat all literals as caseless
4582
     - useRegex - (default=C{True}) - as an optimization, will generate a Regex
4583
          object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
4584
          if creating a C{Regex} raises an exception)
4585

4586
    Example::
4587
        comp_oper = oneOf("< = > <= >= !=")
4588
        var = Word(alphas)
4589
        number = Word(nums)
4590
        term = var | number
4591
        comparison_expr = term + comp_oper + term
4592
        print(comparison_expr.searchString("B = 12  AA=23 B<=AA AA>12"))
4593
    prints::
4594
        [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
4595
    """
4596
    if caseless:
4597
        isequal = ( lambda a,b: a.upper() == b.upper() )
4598
        masks = ( lambda a,b: b.upper().startswith(a.upper()) )
4599
        parseElementClass = CaselessLiteral
4600
    else:
4601
        isequal = ( lambda a,b: a == b )
4602
        masks = ( lambda a,b: b.startswith(a) )
4603
        parseElementClass = Literal
4604

4605
    symbols = []
4606
    if isinstance(strs,basestring):
4607
        symbols = strs.split()
4608
    elif isinstance(strs, Iterable):
4609
        symbols = list(strs)
4610
    else:
4611
        warnings.warn("Invalid argument to oneOf, expected string or iterable",
4612
                SyntaxWarning, stacklevel=2)
4613
    if not symbols:
4614
        return NoMatch()
4615

4616
    i = 0
4617
    while i < len(symbols)-1:
4618
        cur = symbols[i]
4619
        for j,other in enumerate(symbols[i+1:]):
4620
            if ( isequal(other, cur) ):
4621
                del symbols[i+j+1]
4622
                break
4623
            elif ( masks(cur, other) ):
4624
                del symbols[i+j+1]
4625
                symbols.insert(i,other)
4626
                cur = other
4627
                break
4628
        else:
4629
            i += 1
4630

4631
    if not caseless and useRegex:
4632
        #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
4633
        try:
4634
            if len(symbols)==len("".join(symbols)):
4635
                return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
4636
            else:
4637
                return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
4638
        except Exception:
4639
            warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
4640
                    SyntaxWarning, stacklevel=2)
4641

4642

4643
    # last resort, just use MatchFirst
4644
    return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4645

4646
def dictOf( key, value ):
4647
    """
4648
    Helper to easily and clearly define a dictionary by specifying the respective patterns
4649
    for the key and value.  Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
4650
    in the proper order.  The key pattern can include delimiting markers or punctuation,
4651
    as long as they are suppressed, thereby leaving the significant key text.  The value
4652
    pattern can include named results, so that the C{Dict} results can include named token
4653
    fields.
4654

4655
    Example::
4656
        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4657
        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4658
        print(OneOrMore(attr_expr).parseString(text).dump())
4659
        
4660
        attr_label = label
4661
        attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
4662

4663
        # similar to Dict, but simpler call format
4664
        result = dictOf(attr_label, attr_value).parseString(text)
4665
        print(result.dump())
4666
        print(result['shape'])
4667
        print(result.shape)  # object attribute access works too
4668
        print(result.asDict())
4669
    prints::
4670
        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4671
        - color: light blue
4672
        - posn: upper left
4673
        - shape: SQUARE
4674
        - texture: burlap
4675
        SQUARE
4676
        SQUARE
4677
        {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
4678
    """
4679
    return Dict( ZeroOrMore( Group ( key + value ) ) )
4680

4681
def originalTextFor(expr, asString=True):
4682
    """
4683
    Helper to return the original, untokenized text for a given expression.  Useful to
4684
    restore the parsed fields of an HTML start tag into the raw tag text itself, or to
4685
    revert separate tokens with intervening whitespace back to the original matching
4686
    input text. By default, returns astring containing the original parsed text.  
4687
       
4688
    If the optional C{asString} argument is passed as C{False}, then the return value is a 
4689
    C{L{ParseResults}} containing any results names that were originally matched, and a 
4690
    single token containing the original matched text from the input string.  So if 
4691
    the expression passed to C{L{originalTextFor}} contains expressions with defined
4692
    results names, you must set C{asString} to C{False} if you want to preserve those
4693
    results name values.
4694

4695
    Example::
4696
        src = "this is test <b> bold <i>text</i> </b> normal text "
4697
        for tag in ("b","i"):
4698
            opener,closer = makeHTMLTags(tag)
4699
            patt = originalTextFor(opener + SkipTo(closer) + closer)
4700
            print(patt.searchString(src)[0])
4701
    prints::
4702
        ['<b> bold <i>text</i> </b>']
4703
        ['<i>text</i>']
4704
    """
4705
    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
4706
    endlocMarker = locMarker.copy()
4707
    endlocMarker.callPreparse = False
4708
    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
4709
    if asString:
4710
        extractText = lambda s,l,t: s[t._original_start:t._original_end]
4711
    else:
4712
        def extractText(s,l,t):
4713
            t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4714
    matchExpr.setParseAction(extractText)
4715
    matchExpr.ignoreExprs = expr.ignoreExprs
4716
    return matchExpr
4717

4718
def ungroup(expr): 
4719
    """
4720
    Helper to undo pyparsing's default grouping of And expressions, even
4721
    if all but one are non-empty.
4722
    """
4723
    return TokenConverter(expr).setParseAction(lambda t:t[0])
4724

4725
def locatedExpr(expr):
4726
    """
4727
    Helper to decorate a returned token with its starting and ending locations in the input string.
4728
    This helper adds the following results names:
4729
     - locn_start = location where matched expression begins
4730
     - locn_end = location where matched expression ends
4731
     - value = the actual parsed results
4732

4733
    Be careful if the input text contains C{<TAB>} characters, you may want to call
4734
    C{L{ParserElement.parseWithTabs}}
4735

4736
    Example::
4737
        wd = Word(alphas)
4738
        for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
4739
            print(match)
4740
    prints::
4741
        [[0, 'ljsdf', 5]]
4742
        [[8, 'lksdjjf', 15]]
4743
        [[18, 'lkkjj', 23]]
4744
    """
4745
    locator = Empty().setParseAction(lambda s,l,t: l)
4746
    return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4747

4748

4749
# convenience constants for positional expressions
4750
empty       = Empty().setName("empty")
4751
lineStart   = LineStart().setName("lineStart")
4752
lineEnd     = LineEnd().setName("lineEnd")
4753
stringStart = StringStart().setName("stringStart")
4754
stringEnd   = StringEnd().setName("stringEnd")
4755

4756
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
4757
_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
4758
_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
4759
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
4760
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
4761
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4762

4763
def srange(s):
4764
    r"""
4765
    Helper to easily define string ranges for use in Word construction.  Borrows
4766
    syntax from regexp '[]' string range definitions::
4767
        srange("[0-9]")   -> "0123456789"
4768
        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
4769
        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
4770
    The input string must be enclosed in []'s, and the returned string is the expanded
4771
    character set joined into a single string.
4772
    The values enclosed in the []'s may be:
4773
     - a single character
4774
     - an escaped character with a leading backslash (such as C{\-} or C{\]})
4775
     - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 
4776
         (C{\0x##} is also supported for backwards compatibility) 
4777
     - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
4778
     - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
4779
     - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
4780
    """
4781
    _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
4782
    try:
4783
        return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
4784
    except Exception:
4785
        return ""
4786

4787
def matchOnlyAtCol(n):
4788
    """
4789
    Helper method for defining parse actions that require matching at a specific
4790
    column in the input text.
4791
    """
4792
    def verifyCol(strg,locn,toks):
4793
        if col(locn,strg) != n:
4794
            raise ParseException(strg,locn,"matched token not at column %d" % n)
4795
    return verifyCol
4796

4797
def replaceWith(replStr):
4798
    """
4799
    Helper method for common parse actions that simply return a literal value.  Especially
4800
    useful when used with C{L{transformString<ParserElement.transformString>}()}.
4801

4802
    Example::
4803
        num = Word(nums).setParseAction(lambda toks: int(toks[0]))
4804
        na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
4805
        term = na | num
4806
        
4807
        OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
4808
    """
4809
    return lambda s,l,t: [replStr]
4810

4811
def removeQuotes(s,l,t):
4812
    """
4813
    Helper parse action for removing quotation marks from parsed quoted strings.
4814

4815
    Example::
4816
        # by default, quotation marks are included in parsed results
4817
        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
4818

4819
        # use removeQuotes to strip quotation marks from parsed results
4820
        quotedString.setParseAction(removeQuotes)
4821
        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
4822
    """
4823
    return t[0][1:-1]
4824

4825
def tokenMap(func, *args):
4826
    """
4827
    Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 
4828
    args are passed, they are forwarded to the given function as additional arguments after
4829
    the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
4830
    parsed data to an integer using base 16.
4831

4832
    Example (compare the last to example in L{ParserElement.transformString}::
4833
        hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
4834
        hex_ints.runTests('''
4835
            00 11 22 aa FF 0a 0d 1a
4836
            ''')
4837
        
4838
        upperword = Word(alphas).setParseAction(tokenMap(str.upper))
4839
        OneOrMore(upperword).runTests('''
4840
            my kingdom for a horse
4841
            ''')
4842

4843
        wd = Word(alphas).setParseAction(tokenMap(str.title))
4844
        OneOrMore(wd).setParseAction(' '.join).runTests('''
4845
            now is the winter of our discontent made glorious summer by this sun of york
4846
            ''')
4847
    prints::
4848
        00 11 22 aa FF 0a 0d 1a
4849
        [0, 17, 34, 170, 255, 10, 13, 26]
4850

4851
        my kingdom for a horse
4852
        ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
4853

4854
        now is the winter of our discontent made glorious summer by this sun of york
4855
        ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
4856
    """
4857
    def pa(s,l,t):
4858
        return [func(tokn, *args) for tokn in t]
4859

4860
    try:
4861
        func_name = getattr(func, '__name__', 
4862
                            getattr(func, '__class__').__name__)
4863
    except Exception:
4864
        func_name = str(func)
4865
    pa.__name__ = func_name
4866

4867
    return pa
4868

4869
upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
4870
"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
4871

4872
downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
4873
"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
4874
    
4875
def _makeTags(tagStr, xml):
4876
    """Internal helper to construct opening and closing tag expressions, given a tag name"""
4877
    if isinstance(tagStr,basestring):
4878
        resname = tagStr
4879
        tagStr = Keyword(tagStr, caseless=not xml)
4880
    else:
4881
        resname = tagStr.name
4882

4883
    tagAttrName = Word(alphas,alphanums+"_-:")
4884
    if (xml):
4885
        tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
4886
        openTag = Suppress("<") + tagStr("tag") + \
4887
                Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
4888
                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
4889
    else:
4890
        printablesLessRAbrack = "".join(c for c in printables if c not in ">")
4891
        tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
4892
        openTag = Suppress("<") + tagStr("tag") + \
4893
                Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
4894
                Optional( Suppress("=") + tagAttrValue ) ))) + \
4895
                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
4896
    closeTag = Combine(_L("</") + tagStr + ">")
4897

4898
    openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
4899
    closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
4900
    openTag.tag = resname
4901
    closeTag.tag = resname
4902
    return openTag, closeTag
4903

4904
def makeHTMLTags(tagStr):
4905
    """
4906
    Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
4907
    tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
4908

4909
    Example::
4910
        text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
4911
        # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
4912
        a,a_end = makeHTMLTags("A")
4913
        link_expr = a + SkipTo(a_end)("link_text") + a_end
4914
        
4915
        for link in link_expr.searchString(text):
4916
            # attributes in the <A> tag (like "href" shown here) are also accessible as named results
4917
            print(link.link_text, '->', link.href)
4918
    prints::
4919
        pyparsing -> http://pyparsing.wikispaces.com
4920
    """
4921
    return _makeTags( tagStr, False )
4922

4923
def makeXMLTags(tagStr):
4924
    """
4925
    Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
4926
    tags only in the given upper/lower case.
4927

4928
    Example: similar to L{makeHTMLTags}
4929
    """
4930
    return _makeTags( tagStr, True )
4931

4932
def withAttribute(*args,**attrDict):
4933
    """
4934
    Helper to create a validating parse action to be used with start tags created
4935
    with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
4936
    with a required attribute value, to avoid false matches on common tags such as
4937
    C{<TD>} or C{<DIV>}.
4938

4939
    Call C{withAttribute} with a series of attribute names and values. Specify the list
4940
    of filter attributes names and values as:
4941
     - keyword arguments, as in C{(align="right")}, or
4942
     - as an explicit dict with C{**} operator, when an attribute name is also a Python
4943
          reserved word, as in C{**{"class":"Customer", "align":"right"}}
4944
     - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
4945
    For attribute names with a namespace prefix, you must use the second form.  Attribute
4946
    names are matched insensitive to upper/lower case.
4947
       
4948
    If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
4949

4950
    To verify that the attribute exists, but without specifying a value, pass
4951
    C{withAttribute.ANY_VALUE} as the value.
4952

4953
    Example::
4954
        html = '''
4955
            <div>
4956
            Some text
4957
            <div type="grid">1 4 0 1 0</div>
4958
            <div type="graph">1,3 2,3 1,1</div>
4959
            <div>this has no type</div>
4960
            </div>
4961
                
4962
        '''
4963
        div,div_end = makeHTMLTags("div")
4964

4965
        # only match div tag having a type attribute with value "grid"
4966
        div_grid = div().setParseAction(withAttribute(type="grid"))
4967
        grid_expr = div_grid + SkipTo(div | div_end)("body")
4968
        for grid_header in grid_expr.searchString(html):
4969
            print(grid_header.body)
4970
        
4971
        # construct a match with any div tag having a type attribute, regardless of the value
4972
        div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
4973
        div_expr = div_any_type + SkipTo(div | div_end)("body")
4974
        for div_header in div_expr.searchString(html):
4975
            print(div_header.body)
4976
    prints::
4977
        1 4 0 1 0
4978

4979
        1 4 0 1 0
4980
        1,3 2,3 1,1
4981
    """
4982
    if args:
4983
        attrs = args[:]
4984
    else:
4985
        attrs = attrDict.items()
4986
    attrs = [(k,v) for k,v in attrs]
4987
    def pa(s,l,tokens):
4988
        for attrName,attrValue in attrs:
4989
            if attrName not in tokens:
4990
                raise ParseException(s,l,"no matching attribute " + attrName)
4991
            if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
4992
                raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
4993
                                            (attrName, tokens[attrName], attrValue))
4994
    return pa
4995
withAttribute.ANY_VALUE = object()
4996

4997
def withClass(classname, namespace=''):
4998
    """
4999
    Simplified version of C{L{withAttribute}} when matching on a div class - made
5000
    difficult because C{class} is a reserved word in Python.
5001

5002
    Example::
5003
        html = '''
5004
            <div>
5005
            Some text
5006
            <div class="grid">1 4 0 1 0</div>
5007
            <div class="graph">1,3 2,3 1,1</div>
5008
            <div>this &lt;div&gt; has no class</div>
5009
            </div>
5010
                
5011
        '''
5012
        div,div_end = makeHTMLTags("div")
5013
        div_grid = div().setParseAction(withClass("grid"))
5014
        
5015
        grid_expr = div_grid + SkipTo(div | div_end)("body")
5016
        for grid_header in grid_expr.searchString(html):
5017
            print(grid_header.body)
5018
        
5019
        div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
5020
        div_expr = div_any_type + SkipTo(div | div_end)("body")
5021
        for div_header in div_expr.searchString(html):
5022
            print(div_header.body)
5023
    prints::
5024
        1 4 0 1 0
5025

5026
        1 4 0 1 0
5027
        1,3 2,3 1,1
5028
    """
5029
    classattr = "%s:class" % namespace if namespace else "class"
5030
    return withAttribute(**{classattr : classname})        
5031

5032
opAssoc = _Constants()
5033
opAssoc.LEFT = object()
5034
opAssoc.RIGHT = object()
5035

5036
def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
5037
    """
5038
    Helper method for constructing grammars of expressions made up of
5039
    operators working in a precedence hierarchy.  Operators may be unary or
5040
    binary, left- or right-associative.  Parse actions can also be attached
5041
    to operator expressions. The generated parser will also recognize the use 
5042
    of parentheses to override operator precedences (see example below).
5043
    
5044
    Note: if you define a deep operator list, you may see performance issues
5045
    when using infixNotation. See L{ParserElement.enablePackrat} for a
5046
    mechanism to potentially improve your parser performance.
5047

5048
    Parameters:
5049
     - baseExpr - expression representing the most basic element for the nested
5050
     - opList - list of tuples, one for each operator precedence level in the
5051
      expression grammar; each tuple is of the form
5052
      (opExpr, numTerms, rightLeftAssoc, parseAction), where:
5053
       - opExpr is the pyparsing expression for the operator;
5054
          may also be a string, which will be converted to a Literal;
5055
          if numTerms is 3, opExpr is a tuple of two expressions, for the
5056
          two operators separating the 3 terms
5057
       - numTerms is the number of terms for this operator (must
5058
          be 1, 2, or 3)
5059
       - rightLeftAssoc is the indicator whether the operator is
5060
          right or left associative, using the pyparsing-defined
5061
          constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
5062
       - parseAction is the parse action to be associated with
5063
          expressions matching this operator expression (the
5064
          parse action tuple member may be omitted); if the parse action
5065
          is passed a tuple or list of functions, this is equivalent to
5066
          calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})
5067
     - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
5068
     - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
5069

5070
    Example::
5071
        # simple example of four-function arithmetic with ints and variable names
5072
        integer = pyparsing_common.signed_integer
5073
        varname = pyparsing_common.identifier 
5074
        
5075
        arith_expr = infixNotation(integer | varname,
5076
            [
5077
            ('-', 1, opAssoc.RIGHT),
5078
            (oneOf('* /'), 2, opAssoc.LEFT),
5079
            (oneOf('+ -'), 2, opAssoc.LEFT),
5080
            ])
5081
        
5082
        arith_expr.runTests('''
5083
            5+3*6
5084
            (5+3)*6
5085
            -2--11
5086
            ''', fullDump=False)
5087
    prints::
5088
        5+3*6
5089
        [[5, '+', [3, '*', 6]]]
5090

5091
        (5+3)*6
5092
        [[[5, '+', 3], '*', 6]]
5093

5094
        -2--11
5095
        [[['-', 2], '-', ['-', 11]]]
5096
    """
5097
    ret = Forward()
5098
    lastExpr = baseExpr | ( lpar + ret + rpar )
5099
    for i,operDef in enumerate(opList):
5100
        opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
5101
        termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
5102
        if arity == 3:
5103
            if opExpr is None or len(opExpr) != 2:
5104
                raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
5105
            opExpr1, opExpr2 = opExpr
5106
        thisExpr = Forward().setName(termName)
5107
        if rightLeftAssoc == opAssoc.LEFT:
5108
            if arity == 1:
5109
                matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
5110
            elif arity == 2:
5111
                if opExpr is not None:
5112
                    matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
5113
                else:
5114
                    matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
5115
            elif arity == 3:
5116
                matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
5117
                            Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
5118
            else:
5119
                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5120
        elif rightLeftAssoc == opAssoc.RIGHT:
5121
            if arity == 1:
5122
                # try to avoid LR with this extra test
5123
                if not isinstance(opExpr, Optional):
5124
                    opExpr = Optional(opExpr)
5125
                matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
5126
            elif arity == 2:
5127
                if opExpr is not None:
5128
                    matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
5129
                else:
5130
                    matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
5131
            elif arity == 3:
5132
                matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
5133
                            Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
5134
            else:
5135
                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5136
        else:
5137
            raise ValueError("operator must indicate right or left associativity")
5138
        if pa:
5139
            if isinstance(pa, (tuple, list)):
5140
                matchExpr.setParseAction(*pa)
5141
            else:
5142
                matchExpr.setParseAction(pa)
5143
        thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
5144
        lastExpr = thisExpr
5145
    ret <<= lastExpr
5146
    return ret
5147

5148
operatorPrecedence = infixNotation
5149
"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
5150

5151
dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
5152
sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
5153
quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
5154
                       Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
5155
unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5156

5157
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
5158
    """
5159
    Helper method for defining nested lists enclosed in opening and closing
5160
    delimiters ("(" and ")" are the default).
5161

5162
    Parameters:
5163
     - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
5164
     - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
5165
     - content - expression for items within the nested lists (default=C{None})
5166
     - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
5167

5168
    If an expression is not provided for the content argument, the nested
5169
    expression will capture all whitespace-delimited content between delimiters
5170
    as a list of separate values.
5171

5172
    Use the C{ignoreExpr} argument to define expressions that may contain
5173
    opening or closing characters that should not be treated as opening
5174
    or closing characters for nesting, such as quotedString or a comment
5175
    expression.  Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
5176
    The default is L{quotedString}, but if no expressions are to be ignored,
5177
    then pass C{None} for this argument.
5178

5179
    Example::
5180
        data_type = oneOf("void int short long char float double")
5181
        decl_data_type = Combine(data_type + Optional(Word('*')))
5182
        ident = Word(alphas+'_', alphanums+'_')
5183
        number = pyparsing_common.number
5184
        arg = Group(decl_data_type + ident)
5185
        LPAR,RPAR = map(Suppress, "()")
5186

5187
        code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
5188

5189
        c_function = (decl_data_type("type") 
5190
                      + ident("name")
5191
                      + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 
5192
                      + code_body("body"))
5193
        c_function.ignore(cStyleComment)
5194
        
5195
        source_code = '''
5196
            int is_odd(int x) { 
5197
                return (x%2); 
5198
            }
5199
                
5200
            int dec_to_hex(char hchar) { 
5201
                if (hchar >= '0' && hchar <= '9') { 
5202
                    return (ord(hchar)-ord('0')); 
5203
                } else { 
5204
                    return (10+ord(hchar)-ord('A'));
5205
                } 
5206
            }
5207
        '''
5208
        for func in c_function.searchString(source_code):
5209
            print("%(name)s (%(type)s) args: %(args)s" % func)
5210

5211
    prints::
5212
        is_odd (int) args: [['int', 'x']]
5213
        dec_to_hex (int) args: [['char', 'hchar']]
5214
    """
5215
    if opener == closer:
5216
        raise ValueError("opening and closing strings cannot be the same")
5217
    if content is None:
5218
        if isinstance(opener,basestring) and isinstance(closer,basestring):
5219
            if len(opener) == 1 and len(closer)==1:
5220
                if ignoreExpr is not None:
5221
                    content = (Combine(OneOrMore(~ignoreExpr +
5222
                                    CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5223
                                ).setParseAction(lambda t:t[0].strip()))
5224
                else:
5225
                    content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
5226
                                ).setParseAction(lambda t:t[0].strip()))
5227
            else:
5228
                if ignoreExpr is not None:
5229
                    content = (Combine(OneOrMore(~ignoreExpr + 
5230
                                    ~Literal(opener) + ~Literal(closer) +
5231
                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5232
                                ).setParseAction(lambda t:t[0].strip()))
5233
                else:
5234
                    content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
5235
                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5236
                                ).setParseAction(lambda t:t[0].strip()))
5237
        else:
5238
            raise ValueError("opening and closing arguments must be strings if no content expression is given")
5239
    ret = Forward()
5240
    if ignoreExpr is not None:
5241
        ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
5242
    else:
5243
        ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) )
5244
    ret.setName('nested %s%s expression' % (opener,closer))
5245
    return ret
5246

5247
def indentedBlock(blockStatementExpr, indentStack, indent=True):
5248
    """
5249
    Helper method for defining space-delimited indentation blocks, such as
5250
    those used to define block statements in Python source code.
5251

5252
    Parameters:
5253
     - blockStatementExpr - expression defining syntax of statement that
5254
            is repeated within the indented block
5255
     - indentStack - list created by caller to manage indentation stack
5256
            (multiple statementWithIndentedBlock expressions within a single grammar
5257
            should share a common indentStack)
5258
     - indent - boolean indicating whether block must be indented beyond the
5259
            the current level; set to False for block of left-most statements
5260
            (default=C{True})
5261

5262
    A valid block must contain at least one C{blockStatement}.
5263

5264
    Example::
5265
        data = '''
5266
        def A(z):
5267
          A1
5268
          B = 100
5269
          G = A2
5270
          A2
5271
          A3
5272
        B
5273
        def BB(a,b,c):
5274
          BB1
5275
          def BBA():
5276
            bba1
5277
            bba2
5278
            bba3
5279
        C
5280
        D
5281
        def spam(x,y):
5282
             def eggs(z):
5283
                 pass
5284
        '''
5285

5286

5287
        indentStack = [1]
5288
        stmt = Forward()
5289

5290
        identifier = Word(alphas, alphanums)
5291
        funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
5292
        func_body = indentedBlock(stmt, indentStack)
5293
        funcDef = Group( funcDecl + func_body )
5294

5295
        rvalue = Forward()
5296
        funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
5297
        rvalue << (funcCall | identifier | Word(nums))
5298
        assignment = Group(identifier + "=" + rvalue)
5299
        stmt << ( funcDef | assignment | identifier )
5300

5301
        module_body = OneOrMore(stmt)
5302

5303
        parseTree = module_body.parseString(data)
5304
        parseTree.pprint()
5305
    prints::
5306
        [['def',
5307
          'A',
5308
          ['(', 'z', ')'],
5309
          ':',
5310
          [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
5311
         'B',
5312
         ['def',
5313
          'BB',
5314
          ['(', 'a', 'b', 'c', ')'],
5315
          ':',
5316
          [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
5317
         'C',
5318
         'D',
5319
         ['def',
5320
          'spam',
5321
          ['(', 'x', 'y', ')'],
5322
          ':',
5323
          [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 
5324
    """
5325
    def checkPeerIndent(s,l,t):
5326
        if l >= len(s): return
5327
        curCol = col(l,s)
5328
        if curCol != indentStack[-1]:
5329
            if curCol > indentStack[-1]:
5330
                raise ParseFatalException(s,l,"illegal nesting")
5331
            raise ParseException(s,l,"not a peer entry")
5332

5333
    def checkSubIndent(s,l,t):
5334
        curCol = col(l,s)
5335
        if curCol > indentStack[-1]:
5336
            indentStack.append( curCol )
5337
        else:
5338
            raise ParseException(s,l,"not a subentry")
5339

5340
    def checkUnindent(s,l,t):
5341
        if l >= len(s): return
5342
        curCol = col(l,s)
5343
        if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
5344
            raise ParseException(s,l,"not an unindent")
5345
        indentStack.pop()
5346

5347
    NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
5348
    INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
5349
    PEER   = Empty().setParseAction(checkPeerIndent).setName('')
5350
    UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
5351
    if indent:
5352
        smExpr = Group( Optional(NL) +
5353
            #~ FollowedBy(blockStatementExpr) +
5354
            INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
5355
    else:
5356
        smExpr = Group( Optional(NL) +
5357
            (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
5358
    blockStatementExpr.ignore(_bslash + LineEnd())
5359
    return smExpr.setName('indented block')
5360

5361
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5362
punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5363

5364
anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
5365
_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
5366
commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5367
def replaceHTMLEntity(t):
5368
    """Helper parser action to replace common HTML entities with their special characters"""
5369
    return _htmlEntityMap.get(t.entity)
5370

5371
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
5372
cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
5373
"Comment of the form C{/* ... */}"
5374

5375
htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
5376
"Comment of the form C{<!-- ... -->}"
5377

5378
restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
5379
dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
5380
"Comment of the form C{// ... (to end of line)}"
5381

5382
cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
5383
"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
5384

5385
javaStyleComment = cppStyleComment
5386
"Same as C{L{cppStyleComment}}"
5387

5388
pythonStyleComment = Regex(r"#.*").setName("Python style comment")
5389
"Comment of the form C{# ... (to end of line)}"
5390

5391
_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
5392
                                  Optional( Word(" \t") +
5393
                                            ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
5394
commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
5395
"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
5396
   This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
5397

5398
# some other useful expressions - using lower-case class name since we are really using this as a namespace
5399
class pyparsing_common:
5400
    """
5401
    Here are some common low-level expressions that may be useful in jump-starting parser development:
5402
     - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
5403
     - common L{programming identifiers<identifier>}
5404
     - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
5405
     - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
5406
     - L{UUID<uuid>}
5407
     - L{comma-separated list<comma_separated_list>}
5408
    Parse actions:
5409
     - C{L{convertToInteger}}
5410
     - C{L{convertToFloat}}
5411
     - C{L{convertToDate}}
5412
     - C{L{convertToDatetime}}
5413
     - C{L{stripHTMLTags}}
5414
     - C{L{upcaseTokens}}
5415
     - C{L{downcaseTokens}}
5416

5417
    Example::
5418
        pyparsing_common.number.runTests('''
5419
            # any int or real number, returned as the appropriate type
5420
            100
5421
            -100
5422
            +100
5423
            3.14159
5424
            6.02e23
5425
            1e-12
5426
            ''')
5427

5428
        pyparsing_common.fnumber.runTests('''
5429
            # any int or real number, returned as float
5430
            100
5431
            -100
5432
            +100
5433
            3.14159
5434
            6.02e23
5435
            1e-12
5436
            ''')
5437

5438
        pyparsing_common.hex_integer.runTests('''
5439
            # hex numbers
5440
            100
5441
            FF
5442
            ''')
5443

5444
        pyparsing_common.fraction.runTests('''
5445
            # fractions
5446
            1/2
5447
            -3/4
5448
            ''')
5449

5450
        pyparsing_common.mixed_integer.runTests('''
5451
            # mixed fractions
5452
            1
5453
            1/2
5454
            -3/4
5455
            1-3/4
5456
            ''')
5457

5458
        import uuid
5459
        pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5460
        pyparsing_common.uuid.runTests('''
5461
            # uuid
5462
            12345678-1234-5678-1234-567812345678
5463
            ''')
5464
    prints::
5465
        # any int or real number, returned as the appropriate type
5466
        100
5467
        [100]
5468

5469
        -100
5470
        [-100]
5471

5472
        +100
5473
        [100]
5474

5475
        3.14159
5476
        [3.14159]
5477

5478
        6.02e23
5479
        [6.02e+23]
5480

5481
        1e-12
5482
        [1e-12]
5483

5484
        # any int or real number, returned as float
5485
        100
5486
        [100.0]
5487

5488
        -100
5489
        [-100.0]
5490

5491
        +100
5492
        [100.0]
5493

5494
        3.14159
5495
        [3.14159]
5496

5497
        6.02e23
5498
        [6.02e+23]
5499

5500
        1e-12
5501
        [1e-12]
5502

5503
        # hex numbers
5504
        100
5505
        [256]
5506

5507
        FF
5508
        [255]
5509

5510
        # fractions
5511
        1/2
5512
        [0.5]
5513

5514
        -3/4
5515
        [-0.75]
5516

5517
        # mixed fractions
5518
        1
5519
        [1]
5520

5521
        1/2
5522
        [0.5]
5523

5524
        -3/4
5525
        [-0.75]
5526

5527
        1-3/4
5528
        [1.75]
5529

5530
        # uuid
5531
        12345678-1234-5678-1234-567812345678
5532
        [UUID('12345678-1234-5678-1234-567812345678')]
5533
    """
5534

5535
    convertToInteger = tokenMap(int)
5536
    """
5537
    Parse action for converting parsed integers to Python int
5538
    """
5539

5540
    convertToFloat = tokenMap(float)
5541
    """
5542
    Parse action for converting parsed numbers to Python float
5543
    """
5544

5545
    integer = Word(nums).setName("integer").setParseAction(convertToInteger)
5546
    """expression that parses an unsigned integer, returns an int"""
5547

5548
    hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
5549
    """expression that parses a hexadecimal integer, returns an int"""
5550

5551
    signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
5552
    """expression that parses an integer with optional leading sign, returns an int"""
5553

5554
    fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
5555
    """fractional expression of an integer divided by an integer, returns a float"""
5556
    fraction.addParseAction(lambda t: t[0]/t[-1])
5557

5558
    mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
5559
    """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
5560
    mixed_integer.addParseAction(sum)
5561

5562
    real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
5563
    """expression that parses a floating point number and returns a float"""
5564

5565
    sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
5566
    """expression that parses a floating point number with optional scientific notation and returns a float"""
5567

5568
    # streamlining this expression makes the docs nicer-looking
5569
    number = (sci_real | real | signed_integer).streamline()
5570
    """any numeric expression, returns the corresponding Python type"""
5571

5572
    fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
5573
    """any int or real number, returned as float"""
5574
    
5575
    identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
5576
    """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
5577
    
5578
    ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
5579
    "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
5580

5581
    _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
5582
    _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
5583
    _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
5584
    _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
5585
    _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
5586
    ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
5587
    "IPv6 address (long, short, or mixed form)"
5588
    
5589
    mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
5590
    "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
5591

5592
    @staticmethod
5593
    def convertToDate(fmt="%Y-%m-%d"):
5594
        """
5595
        Helper to create a parse action for converting parsed date string to Python datetime.date
5596

5597
        Params -
5598
         - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
5599

5600
        Example::
5601
            date_expr = pyparsing_common.iso8601_date.copy()
5602
            date_expr.setParseAction(pyparsing_common.convertToDate())
5603
            print(date_expr.parseString("1999-12-31"))
5604
        prints::
5605
            [datetime.date(1999, 12, 31)]
5606
        """
5607
        def cvt_fn(s,l,t):
5608
            try:
5609
                return datetime.strptime(t[0], fmt).date()
5610
            except ValueError as ve:
5611
                raise ParseException(s, l, str(ve))
5612
        return cvt_fn
5613

5614
    @staticmethod
5615
    def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5616
        """
5617
        Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
5618

5619
        Params -
5620
         - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
5621

5622
        Example::
5623
            dt_expr = pyparsing_common.iso8601_datetime.copy()
5624
            dt_expr.setParseAction(pyparsing_common.convertToDatetime())
5625
            print(dt_expr.parseString("1999-12-31T23:59:59.999"))
5626
        prints::
5627
            [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
5628
        """
5629
        def cvt_fn(s,l,t):
5630
            try:
5631
                return datetime.strptime(t[0], fmt)
5632
            except ValueError as ve:
5633
                raise ParseException(s, l, str(ve))
5634
        return cvt_fn
5635

5636
    iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
5637
    "ISO8601 date (C{yyyy-mm-dd})"
5638

5639
    iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
5640
    "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
5641

5642
    uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
5643
    "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
5644

5645
    _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
5646
    @staticmethod
5647
    def stripHTMLTags(s, l, tokens):
5648
        """
5649
        Parse action to remove HTML tags from web page HTML source
5650

5651
        Example::
5652
            # strip HTML links from normal text 
5653
            text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
5654
            td,td_end = makeHTMLTags("TD")
5655
            table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
5656
            
5657
            print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
5658
        """
5659
        return pyparsing_common._html_stripper.transformString(tokens[0])
5660

5661
    _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') 
5662
                                        + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
5663
    comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
5664
    """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5665

5666
    upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
5667
    """Parse action to convert tokens to upper case."""
5668

5669
    downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
5670
    """Parse action to convert tokens to lower case."""
5671

5672

5673
if __name__ == "__main__":
5674

5675
    selectToken    = CaselessLiteral("select")
5676
    fromToken      = CaselessLiteral("from")
5677

5678
    ident          = Word(alphas, alphanums + "_$")
5679

5680
    columnName     = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5681
    columnNameList = Group(delimitedList(columnName)).setName("columns")
5682
    columnSpec     = ('*' | columnNameList)
5683

5684
    tableName      = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5685
    tableNameList  = Group(delimitedList(tableName)).setName("tables")
5686
    
5687
    simpleSQL      = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
5688

5689
    # demo runTests method, including embedded comments in test string
5690
    simpleSQL.runTests("""
5691
        # '*' as column list and dotted table name
5692
        select * from SYS.XYZZY
5693

5694
        # caseless match on "SELECT", and casts back to "select"
5695
        SELECT * from XYZZY, ABC
5696

5697
        # list of column names, and mixed case SELECT keyword
5698
        Select AA,BB,CC from Sys.dual
5699

5700
        # multiple tables
5701
        Select A, B, C from Sys.dual, Table2
5702

5703
        # invalid SELECT keyword - should fail
5704
        Xelect A, B, C from Sys.dual
5705

5706
        # incomplete command - should fail
5707
        Select
5708

5709
        # invalid column name - should fail
5710
        Select ^^^ frox Sys.dual
5711

5712
        """)
5713

5714
    pyparsing_common.number.runTests("""
5715
        100
5716
        -100
5717
        +100
5718
        3.14159
5719
        6.02e23
5720
        1e-12
5721
        """)
5722

5723
    # any int or real number, returned as float
5724
    pyparsing_common.fnumber.runTests("""
5725
        100
5726
        -100
5727
        +100
5728
        3.14159
5729
        6.02e23
5730
        1e-12
5731
        """)
5732

5733
    pyparsing_common.hex_integer.runTests("""
5734
        100
5735
        FF
5736
        """)
5737

5738
    import uuid
5739
    pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5740
    pyparsing_common.uuid.runTests("""
5741
        12345678-1234-5678-1234-567812345678
5742
        """)
5743

5744
Product

Resources

Company