CoCalc -- pyparsing.py

GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/pip/_vendor/pyparsing.py
⁸¹¹ views
1
# -*- coding: utf-8 -*-
2
# module pyparsing.py
3
#
4
# Copyright (c) 2003-2019  Paul T. McGuire
5
#
6
# Permission is hereby granted, free of charge, to any person obtaining
7
# a copy of this software and associated documentation files (the
8
# "Software"), to deal in the Software without restriction, including
9
# without limitation the rights to use, copy, modify, merge, publish,
10
# distribute, sublicense, and/or sell copies of the Software, and to
11
# permit persons to whom the Software is furnished to do so, subject to
12
# the following conditions:
13
#
14
# The above copyright notice and this permission notice shall be
15
# included in all copies or substantial portions of the Software.
16
#
17
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
#
25

26
__doc__ = \
27
"""
28
pyparsing module - Classes and methods to define and execute parsing grammars
29
=============================================================================
30

31
The pyparsing module is an alternative approach to creating and
32
executing simple grammars, vs. the traditional lex/yacc approach, or the
33
use of regular expressions.  With pyparsing, you don't need to learn
34
a new syntax for defining grammars or matching expressions - the parsing
35
module provides a library of classes that you use to construct the
36
grammar directly in Python.
37

38
Here is a program to parse "Hello, World!" (or any greeting of the form
39
``"<salutation>, <addressee>!"``), built up using :class:`Word`,
40
:class:`Literal`, and :class:`And` elements
41
(the :class:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
42
and the strings are auto-converted to :class:`Literal` expressions)::
43

44
    from pip._vendor.pyparsing import Word, alphas
45

46
    # define grammar of a greeting
47
    greet = Word(alphas) + "," + Word(alphas) + "!"
48

49
    hello = "Hello, World!"
50
    print (hello, "->", greet.parseString(hello))
51

52
The program outputs the following::
53

54
    Hello, World! -> ['Hello', ',', 'World', '!']
55

56
The Python representation of the grammar is quite readable, owing to the
57
self-explanatory class names, and the use of '+', '|' and '^' operators.
58

59
The :class:`ParseResults` object returned from
60
:class:`ParserElement.parseString` can be
61
accessed as a nested list, a dictionary, or an object with named
62
attributes.
63

64
The pyparsing module handles some of the problems that are typically
65
vexing when writing text parsers:
66

67
  - extra or missing whitespace (the above program will also handle
68
    "Hello,World!", "Hello  ,  World  !", etc.)
69
  - quoted strings
70
  - embedded comments
71

72

73
Getting Started -
74
-----------------
75
Visit the classes :class:`ParserElement` and :class:`ParseResults` to
76
see the base classes that most other pyparsing
77
classes inherit from. Use the docstrings for examples of how to:
78

79
 - construct literal match expressions from :class:`Literal` and
80
   :class:`CaselessLiteral` classes
81
 - construct character word-group expressions using the :class:`Word`
82
   class
83
 - see how to create repetitive expressions using :class:`ZeroOrMore`
84
   and :class:`OneOrMore` classes
85
 - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
86
   and :class:`'&'<Each>` operators to combine simple expressions into
87
   more complex ones
88
 - associate names with your parsed results using
89
   :class:`ParserElement.setResultsName`
90
 - access the parsed data, which is returned as a :class:`ParseResults`
91
   object
92
 - find some helpful expression short-cuts like :class:`delimitedList`
93
   and :class:`oneOf`
94
 - find more useful common expressions in the :class:`pyparsing_common`
95
   namespace class
96
"""
97

98
__version__ = "2.4.7"
99
__versionTime__ = "30 Mar 2020 00:43 UTC"
100
__author__ = "Paul McGuire <[email protected]>"
101

102
import string
103
from weakref import ref as wkref
104
import copy
105
import sys
106
import warnings
107
import re
108
import sre_constants
109
import collections
110
import pprint
111
import traceback
112
import types
113
from datetime import datetime
114
from operator import itemgetter
115
import itertools
116
from functools import wraps
117
from contextlib import contextmanager
118

119
try:
120
    # Python 3
121
    from itertools import filterfalse
122
except ImportError:
123
    from itertools import ifilterfalse as filterfalse
124

125
try:
126
    from _thread import RLock
127
except ImportError:
128
    from threading import RLock
129

130
try:
131
    # Python 3
132
    from collections.abc import Iterable
133
    from collections.abc import MutableMapping, Mapping
134
except ImportError:
135
    # Python 2.7
136
    from collections import Iterable
137
    from collections import MutableMapping, Mapping
138

139
try:
140
    from collections import OrderedDict as _OrderedDict
141
except ImportError:
142
    try:
143
        from ordereddict import OrderedDict as _OrderedDict
144
    except ImportError:
145
        _OrderedDict = None
146

147
try:
148
    from types import SimpleNamespace
149
except ImportError:
150
    class SimpleNamespace: pass
151

152
# version compatibility configuration
153
__compat__ = SimpleNamespace()
154
__compat__.__doc__ = """
155
    A cross-version compatibility configuration for pyparsing features that will be
156
    released in a future version. By setting values in this configuration to True,
157
    those features can be enabled in prior versions for compatibility development
158
    and testing.
159

160
     - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping
161
       of results names when an And expression is nested within an Or or MatchFirst; set to
162
       True to enable bugfix released in pyparsing 2.3.0, or False to preserve
163
       pre-2.3.0 handling of named results
164
"""
165
__compat__.collect_all_And_tokens = True
166

167
__diag__ = SimpleNamespace()
168
__diag__.__doc__ = """
169
Diagnostic configuration (all default to False)
170
     - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results
171
       name is defined on a MatchFirst or Or expression with one or more And subexpressions
172
       (only warns if __compat__.collect_all_And_tokens is False)
173
     - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results
174
       name is defined on a containing expression with ungrouped subexpressions that also
175
       have results names
176
     - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined
177
       with a results name, but has no contents defined
178
     - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is
179
       incorrectly called with multiple str arguments
180
     - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
181
       calls to ParserElement.setName()
182
"""
183
__diag__.warn_multiple_tokens_in_named_alternation = False
184
__diag__.warn_ungrouped_named_tokens_in_collection = False
185
__diag__.warn_name_set_on_empty_Forward = False
186
__diag__.warn_on_multiple_string_args_to_oneof = False
187
__diag__.enable_debug_on_named_expressions = False
188
__diag__._all_names = [nm for nm in vars(__diag__) if nm.startswith("enable_") or nm.startswith("warn_")]
189

190
def _enable_all_warnings():
191
    __diag__.warn_multiple_tokens_in_named_alternation = True
192
    __diag__.warn_ungrouped_named_tokens_in_collection = True
193
    __diag__.warn_name_set_on_empty_Forward = True
194
    __diag__.warn_on_multiple_string_args_to_oneof = True
195
__diag__.enable_all_warnings = _enable_all_warnings
196

197

198
__all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag__',
199
           'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
200
           'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
201
           'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
202
           'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
203
           'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
204
           'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
205
           'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char',
206
           'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
207
           'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
208
           'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
209
           'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
210
           'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
211
           'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
212
           'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
213
           'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
214
           'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
215
           'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 'locatedExpr', 'withClass',
216
           'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
217
           'conditionAsParseAction', 're',
218
           ]
219

220
system_version = tuple(sys.version_info)[:3]
221
PY_3 = system_version[0] == 3
222
if PY_3:
223
    _MAX_INT = sys.maxsize
224
    basestring = str
225
    unichr = chr
226
    unicode = str
227
    _ustr = str
228

229
    # build list of single arg builtins, that can be used as parse actions
230
    singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
231

232
else:
233
    _MAX_INT = sys.maxint
234
    range = xrange
235

236
    def _ustr(obj):
237
        """Drop-in replacement for str(obj) that tries to be Unicode
238
        friendly. It first tries str(obj). If that fails with
239
        a UnicodeEncodeError, then it tries unicode(obj). It then
240
        < returns the unicode object | encodes it with the default
241
        encoding | ... >.
242
        """
243
        if isinstance(obj, unicode):
244
            return obj
245

246
        try:
247
            # If this works, then _ustr(obj) has the same behaviour as str(obj), so
248
            # it won't break any existing code.
249
            return str(obj)
250

251
        except UnicodeEncodeError:
252
            # Else encode it
253
            ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
254
            xmlcharref = Regex(r'&#\d+;')
255
            xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
256
            return xmlcharref.transformString(ret)
257

258
    # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
259
    singleArgBuiltins = []
260
    import __builtin__
261

262
    for fname in "sum len sorted reversed list tuple set any all min max".split():
263
        try:
264
            singleArgBuiltins.append(getattr(__builtin__, fname))
265
        except AttributeError:
266
            continue
267

268
_generatorType = type((y for y in range(1)))
269

270
def _xml_escape(data):
271
    """Escape &, <, >, ", ', etc. in a string of data."""
272

273
    # ampersand must be replaced first
274
    from_symbols = '&><"\''
275
    to_symbols = ('&' + s + ';' for s in "amp gt lt quot apos".split())
276
    for from_, to_ in zip(from_symbols, to_symbols):
277
        data = data.replace(from_, to_)
278
    return data
279

280
alphas = string.ascii_uppercase + string.ascii_lowercase
281
nums = "0123456789"
282
hexnums = nums + "ABCDEFabcdef"
283
alphanums = alphas + nums
284
_bslash = chr(92)
285
printables = "".join(c for c in string.printable if c not in string.whitespace)
286

287

288
def conditionAsParseAction(fn, message=None, fatal=False):
289
    msg = message if message is not None else "failed user-defined condition"
290
    exc_type = ParseFatalException if fatal else ParseException
291
    fn = _trim_arity(fn)
292

293
    @wraps(fn)
294
    def pa(s, l, t):
295
        if not bool(fn(s, l, t)):
296
            raise exc_type(s, l, msg)
297

298
    return pa
299

300
class ParseBaseException(Exception):
301
    """base exception class for all parsing runtime exceptions"""
302
    # Performance tuning: we construct a *lot* of these, so keep this
303
    # constructor as small and fast as possible
304
    def __init__(self, pstr, loc=0, msg=None, elem=None):
305
        self.loc = loc
306
        if msg is None:
307
            self.msg = pstr
308
            self.pstr = ""
309
        else:
310
            self.msg = msg
311
            self.pstr = pstr
312
        self.parserElement = elem
313
        self.args = (pstr, loc, msg)
314

315
    @classmethod
316
    def _from_exception(cls, pe):
317
        """
318
        internal factory method to simplify creating one type of ParseException
319
        from another - avoids having __init__ signature conflicts among subclasses
320
        """
321
        return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
322

323
    def __getattr__(self, aname):
324
        """supported attributes by name are:
325
           - lineno - returns the line number of the exception text
326
           - col - returns the column number of the exception text
327
           - line - returns the line containing the exception text
328
        """
329
        if aname == "lineno":
330
            return lineno(self.loc, self.pstr)
331
        elif aname in ("col", "column"):
332
            return col(self.loc, self.pstr)
333
        elif aname == "line":
334
            return line(self.loc, self.pstr)
335
        else:
336
            raise AttributeError(aname)
337

338
    def __str__(self):
339
        if self.pstr:
340
            if self.loc >= len(self.pstr):
341
                foundstr = ', found end of text'
342
            else:
343
                foundstr = (', found %r' % self.pstr[self.loc:self.loc + 1]).replace(r'\\', '\\')
344
        else:
345
            foundstr = ''
346
        return ("%s%s  (at char %d), (line:%d, col:%d)" %
347
                   (self.msg, foundstr, self.loc, self.lineno, self.column))
348
    def __repr__(self):
349
        return _ustr(self)
350
    def markInputline(self, markerString=">!<"):
351
        """Extracts the exception line from the input string, and marks
352
           the location of the exception with a special symbol.
353
        """
354
        line_str = self.line
355
        line_column = self.column - 1
356
        if markerString:
357
            line_str = "".join((line_str[:line_column],
358
                                markerString, line_str[line_column:]))
359
        return line_str.strip()
360
    def __dir__(self):
361
        return "lineno col line".split() + dir(type(self))
362

363
class ParseException(ParseBaseException):
364
    """
365
    Exception thrown when parse expressions don't match class;
366
    supported attributes by name are:
367
    - lineno - returns the line number of the exception text
368
    - col - returns the column number of the exception text
369
    - line - returns the line containing the exception text
370

371
    Example::
372

373
        try:
374
            Word(nums).setName("integer").parseString("ABC")
375
        except ParseException as pe:
376
            print(pe)
377
            print("column: {}".format(pe.col))
378

379
    prints::
380

381
       Expected integer (at char 0), (line:1, col:1)
382
        column: 1
383

384
    """
385

386
    @staticmethod
387
    def explain(exc, depth=16):
388
        """
389
        Method to take an exception and translate the Python internal traceback into a list
390
        of the pyparsing expressions that caused the exception to be raised.
391

392
        Parameters:
393

394
         - exc - exception raised during parsing (need not be a ParseException, in support
395
           of Python exceptions that might be raised in a parse action)
396
         - depth (default=16) - number of levels back in the stack trace to list expression
397
           and function names; if None, the full stack trace names will be listed; if 0, only
398
           the failing input line, marker, and exception string will be shown
399

400
        Returns a multi-line string listing the ParserElements and/or function names in the
401
        exception's stack trace.
402

403
        Note: the diagnostic output will include string representations of the expressions
404
        that failed to parse. These representations will be more helpful if you use `setName` to
405
        give identifiable names to your expressions. Otherwise they will use the default string
406
        forms, which may be cryptic to read.
407

408
        explain() is only supported under Python 3.
409
        """
410
        import inspect
411

412
        if depth is None:
413
            depth = sys.getrecursionlimit()
414
        ret = []
415
        if isinstance(exc, ParseBaseException):
416
            ret.append(exc.line)
417
            ret.append(' ' * (exc.col - 1) + '^')
418
        ret.append("{0}: {1}".format(type(exc).__name__, exc))
419

420
        if depth > 0:
421
            callers = inspect.getinnerframes(exc.__traceback__, context=depth)
422
            seen = set()
423
            for i, ff in enumerate(callers[-depth:]):
424
                frm = ff[0]
425

426
                f_self = frm.f_locals.get('self', None)
427
                if isinstance(f_self, ParserElement):
428
                    if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'):
429
                        continue
430
                    if f_self in seen:
431
                        continue
432
                    seen.add(f_self)
433

434
                    self_type = type(f_self)
435
                    ret.append("{0}.{1} - {2}".format(self_type.__module__,
436
                                                      self_type.__name__,
437
                                                      f_self))
438
                elif f_self is not None:
439
                    self_type = type(f_self)
440
                    ret.append("{0}.{1}".format(self_type.__module__,
441
                                                self_type.__name__))
442
                else:
443
                    code = frm.f_code
444
                    if code.co_name in ('wrapper', '<module>'):
445
                        continue
446

447
                    ret.append("{0}".format(code.co_name))
448

449
                depth -= 1
450
                if not depth:
451
                    break
452

453
        return '\n'.join(ret)
454

455

456
class ParseFatalException(ParseBaseException):
457
    """user-throwable exception thrown when inconsistent parse content
458
       is found; stops all parsing immediately"""
459
    pass
460

461
class ParseSyntaxException(ParseFatalException):
462
    """just like :class:`ParseFatalException`, but thrown internally
463
    when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
464
    that parsing is to stop immediately because an unbacktrackable
465
    syntax error has been found.
466
    """
467
    pass
468

469
#~ class ReparseException(ParseBaseException):
470
    #~ """Experimental class - parse actions can raise this exception to cause
471
       #~ pyparsing to reparse the input string:
472
        #~ - with a modified input string, and/or
473
        #~ - with a modified start location
474
       #~ Set the values of the ReparseException in the constructor, and raise the
475
       #~ exception in a parse action to cause pyparsing to use the new string/location.
476
       #~ Setting the values as None causes no change to be made.
477
       #~ """
478
    #~ def __init_( self, newstring, restartLoc ):
479
        #~ self.newParseText = newstring
480
        #~ self.reparseLoc = restartLoc
481

482
class RecursiveGrammarException(Exception):
483
    """exception thrown by :class:`ParserElement.validate` if the
484
    grammar could be improperly recursive
485
    """
486
    def __init__(self, parseElementList):
487
        self.parseElementTrace = parseElementList
488

489
    def __str__(self):
490
        return "RecursiveGrammarException: %s" % self.parseElementTrace
491

492
class _ParseResultsWithOffset(object):
493
    def __init__(self, p1, p2):
494
        self.tup = (p1, p2)
495
    def __getitem__(self, i):
496
        return self.tup[i]
497
    def __repr__(self):
498
        return repr(self.tup[0])
499
    def setOffset(self, i):
500
        self.tup = (self.tup[0], i)
501

502
class ParseResults(object):
503
    """Structured parse results, to provide multiple means of access to
504
    the parsed data:
505

506
       - as a list (``len(results)``)
507
       - by list index (``results[0], results[1]``, etc.)
508
       - by attribute (``results.<resultsName>`` - see :class:`ParserElement.setResultsName`)
509

510
    Example::
511

512
        integer = Word(nums)
513
        date_str = (integer.setResultsName("year") + '/'
514
                        + integer.setResultsName("month") + '/'
515
                        + integer.setResultsName("day"))
516
        # equivalent form:
517
        # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
518

519
        # parseString returns a ParseResults object
520
        result = date_str.parseString("1999/12/31")
521

522
        def test(s, fn=repr):
523
            print("%s -> %s" % (s, fn(eval(s))))
524
        test("list(result)")
525
        test("result[0]")
526
        test("result['month']")
527
        test("result.day")
528
        test("'month' in result")
529
        test("'minutes' in result")
530
        test("result.dump()", str)
531

532
    prints::
533

534
        list(result) -> ['1999', '/', '12', '/', '31']
535
        result[0] -> '1999'
536
        result['month'] -> '12'
537
        result.day -> '31'
538
        'month' in result -> True
539
        'minutes' in result -> False
540
        result.dump() -> ['1999', '/', '12', '/', '31']
541
        - day: 31
542
        - month: 12
543
        - year: 1999
544
    """
545
    def __new__(cls, toklist=None, name=None, asList=True, modal=True):
546
        if isinstance(toklist, cls):
547
            return toklist
548
        retobj = object.__new__(cls)
549
        retobj.__doinit = True
550
        return retobj
551

552
    # Performance tuning: we construct a *lot* of these, so keep this
553
    # constructor as small and fast as possible
554
    def __init__(self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance):
555
        if self.__doinit:
556
            self.__doinit = False
557
            self.__name = None
558
            self.__parent = None
559
            self.__accumNames = {}
560
            self.__asList = asList
561
            self.__modal = modal
562
            if toklist is None:
563
                toklist = []
564
            if isinstance(toklist, list):
565
                self.__toklist = toklist[:]
566
            elif isinstance(toklist, _generatorType):
567
                self.__toklist = list(toklist)
568
            else:
569
                self.__toklist = [toklist]
570
            self.__tokdict = dict()
571

572
        if name is not None and name:
573
            if not modal:
574
                self.__accumNames[name] = 0
575
            if isinstance(name, int):
576
                name = _ustr(name)  # will always return a str, but use _ustr for consistency
577
            self.__name = name
578
            if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None, '', [])):
579
                if isinstance(toklist, basestring):
580
                    toklist = [toklist]
581
                if asList:
582
                    if isinstance(toklist, ParseResults):
583
                        self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0)
584
                    else:
585
                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0)
586
                    self[name].__name = name
587
                else:
588
                    try:
589
                        self[name] = toklist[0]
590
                    except (KeyError, TypeError, IndexError):
591
                        self[name] = toklist
592

593
    def __getitem__(self, i):
594
        if isinstance(i, (int, slice)):
595
            return self.__toklist[i]
596
        else:
597
            if i not in self.__accumNames:
598
                return self.__tokdict[i][-1][0]
599
            else:
600
                return ParseResults([v[0] for v in self.__tokdict[i]])
601

602
    def __setitem__(self, k, v, isinstance=isinstance):
603
        if isinstance(v, _ParseResultsWithOffset):
604
            self.__tokdict[k] = self.__tokdict.get(k, list()) + [v]
605
            sub = v[0]
606
        elif isinstance(k, (int, slice)):
607
            self.__toklist[k] = v
608
            sub = v
609
        else:
610
            self.__tokdict[k] = self.__tokdict.get(k, list()) + [_ParseResultsWithOffset(v, 0)]
611
            sub = v
612
        if isinstance(sub, ParseResults):
613
            sub.__parent = wkref(self)
614

615
    def __delitem__(self, i):
616
        if isinstance(i, (int, slice)):
617
            mylen = len(self.__toklist)
618
            del self.__toklist[i]
619

620
            # convert int to slice
621
            if isinstance(i, int):
622
                if i < 0:
623
                    i += mylen
624
                i = slice(i, i + 1)
625
            # get removed indices
626
            removed = list(range(*i.indices(mylen)))
627
            removed.reverse()
628
            # fixup indices in token dictionary
629
            for name, occurrences in self.__tokdict.items():
630
                for j in removed:
631
                    for k, (value, position) in enumerate(occurrences):
632
                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
633
        else:
634
            del self.__tokdict[i]
635

636
    def __contains__(self, k):
637
        return k in self.__tokdict
638

639
    def __len__(self):
640
        return len(self.__toklist)
641

642
    def __bool__(self):
643
        return (not not self.__toklist)
644
    __nonzero__ = __bool__
645

646
    def __iter__(self):
647
        return iter(self.__toklist)
648

649
    def __reversed__(self):
650
        return iter(self.__toklist[::-1])
651

652
    def _iterkeys(self):
653
        if hasattr(self.__tokdict, "iterkeys"):
654
            return self.__tokdict.iterkeys()
655
        else:
656
            return iter(self.__tokdict)
657

658
    def _itervalues(self):
659
        return (self[k] for k in self._iterkeys())
660

661
    def _iteritems(self):
662
        return ((k, self[k]) for k in self._iterkeys())
663

664
    if PY_3:
665
        keys = _iterkeys
666
        """Returns an iterator of all named result keys."""
667

668
        values = _itervalues
669
        """Returns an iterator of all named result values."""
670

671
        items = _iteritems
672
        """Returns an iterator of all named result key-value tuples."""
673

674
    else:
675
        iterkeys = _iterkeys
676
        """Returns an iterator of all named result keys (Python 2.x only)."""
677

678
        itervalues = _itervalues
679
        """Returns an iterator of all named result values (Python 2.x only)."""
680

681
        iteritems = _iteritems
682
        """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
683

684
        def keys(self):
685
            """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
686
            return list(self.iterkeys())
687

688
        def values(self):
689
            """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
690
            return list(self.itervalues())
691

692
        def items(self):
693
            """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
694
            return list(self.iteritems())
695

696
    def haskeys(self):
697
        """Since keys() returns an iterator, this method is helpful in bypassing
698
           code that looks for the existence of any defined results names."""
699
        return bool(self.__tokdict)
700

701
    def pop(self, *args, **kwargs):
702
        """
703
        Removes and returns item at specified index (default= ``last``).
704
        Supports both ``list`` and ``dict`` semantics for ``pop()``. If
705
        passed no argument or an integer argument, it will use ``list``
706
        semantics and pop tokens from the list of parsed tokens. If passed
707
        a non-integer argument (most likely a string), it will use ``dict``
708
        semantics and pop the corresponding value from any defined results
709
        names. A second default return value argument is supported, just as in
710
        ``dict.pop()``.
711

712
        Example::
713

714
            def remove_first(tokens):
715
                tokens.pop(0)
716
            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
717
            print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
718

719
            label = Word(alphas)
720
            patt = label("LABEL") + OneOrMore(Word(nums))
721
            print(patt.parseString("AAB 123 321").dump())
722

723
            # Use pop() in a parse action to remove named result (note that corresponding value is not
724
            # removed from list form of results)
725
            def remove_LABEL(tokens):
726
                tokens.pop("LABEL")
727
                return tokens
728
            patt.addParseAction(remove_LABEL)
729
            print(patt.parseString("AAB 123 321").dump())
730

731
        prints::
732

733
            ['AAB', '123', '321']
734
            - LABEL: AAB
735

736
            ['AAB', '123', '321']
737
        """
738
        if not args:
739
            args = [-1]
740
        for k, v in kwargs.items():
741
            if k == 'default':
742
                args = (args[0], v)
743
            else:
744
                raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
745
        if (isinstance(args[0], int)
746
                or len(args) == 1
747
                or args[0] in self):
748
            index = args[0]
749
            ret = self[index]
750
            del self[index]
751
            return ret
752
        else:
753
            defaultvalue = args[1]
754
            return defaultvalue
755

756
    def get(self, key, defaultValue=None):
757
        """
758
        Returns named result matching the given key, or if there is no
759
        such name, then returns the given ``defaultValue`` or ``None`` if no
760
        ``defaultValue`` is specified.
761

762
        Similar to ``dict.get()``.
763

764
        Example::
765

766
            integer = Word(nums)
767
            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
768

769
            result = date_str.parseString("1999/12/31")
770
            print(result.get("year")) # -> '1999'
771
            print(result.get("hour", "not specified")) # -> 'not specified'
772
            print(result.get("hour")) # -> None
773
        """
774
        if key in self:
775
            return self[key]
776
        else:
777
            return defaultValue
778

779
    def insert(self, index, insStr):
780
        """
781
        Inserts new element at location index in the list of parsed tokens.
782

783
        Similar to ``list.insert()``.
784

785
        Example::
786

787
            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
788

789
            # use a parse action to insert the parse location in the front of the parsed results
790
            def insert_locn(locn, tokens):
791
                tokens.insert(0, locn)
792
            print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
793
        """
794
        self.__toklist.insert(index, insStr)
795
        # fixup indices in token dictionary
796
        for name, occurrences in self.__tokdict.items():
797
            for k, (value, position) in enumerate(occurrences):
798
                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
799

800
    def append(self, item):
801
        """
802
        Add single element to end of ParseResults list of elements.
803

804
        Example::
805

806
            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
807

808
            # use a parse action to compute the sum of the parsed integers, and add it to the end
809
            def append_sum(tokens):
810
                tokens.append(sum(map(int, tokens)))
811
            print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
812
        """
813
        self.__toklist.append(item)
814

815
    def extend(self, itemseq):
816
        """
817
        Add sequence of elements to end of ParseResults list of elements.
818

819
        Example::
820

821
            patt = OneOrMore(Word(alphas))
822

823
            # use a parse action to append the reverse of the matched strings, to make a palindrome
824
            def make_palindrome(tokens):
825
                tokens.extend(reversed([t[::-1] for t in tokens]))
826
                return ''.join(tokens)
827
            print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
828
        """
829
        if isinstance(itemseq, ParseResults):
830
            self.__iadd__(itemseq)
831
        else:
832
            self.__toklist.extend(itemseq)
833

834
    def clear(self):
835
        """
836
        Clear all elements and results names.
837
        """
838
        del self.__toklist[:]
839
        self.__tokdict.clear()
840

841
    def __getattr__(self, name):
842
        try:
843
            return self[name]
844
        except KeyError:
845
            return ""
846

847
    def __add__(self, other):
848
        ret = self.copy()
849
        ret += other
850
        return ret
851

852
    def __iadd__(self, other):
853
        if other.__tokdict:
854
            offset = len(self.__toklist)
855
            addoffset = lambda a: offset if a < 0 else a + offset
856
            otheritems = other.__tokdict.items()
857
            otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
858
                              for k, vlist in otheritems for v in vlist]
859
            for k, v in otherdictitems:
860
                self[k] = v
861
                if isinstance(v[0], ParseResults):
862
                    v[0].__parent = wkref(self)
863

864
        self.__toklist += other.__toklist
865
        self.__accumNames.update(other.__accumNames)
866
        return self
867

868
    def __radd__(self, other):
869
        if isinstance(other, int) and other == 0:
870
            # useful for merging many ParseResults using sum() builtin
871
            return self.copy()
872
        else:
873
            # this may raise a TypeError - so be it
874
            return other + self
875

876
    def __repr__(self):
877
        return "(%s, %s)" % (repr(self.__toklist), repr(self.__tokdict))
878

879
    def __str__(self):
880
        return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
881

882
    def _asStringList(self, sep=''):
883
        out = []
884
        for item in self.__toklist:
885
            if out and sep:
886
                out.append(sep)
887
            if isinstance(item, ParseResults):
888
                out += item._asStringList()
889
            else:
890
                out.append(_ustr(item))
891
        return out
892

893
    def asList(self):
894
        """
895
        Returns the parse results as a nested list of matching tokens, all converted to strings.
896

897
        Example::
898

899
            patt = OneOrMore(Word(alphas))
900
            result = patt.parseString("sldkj lsdkj sldkj")
901
            # even though the result prints in string-like form, it is actually a pyparsing ParseResults
902
            print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
903

904
            # Use asList() to create an actual list
905
            result_list = result.asList()
906
            print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
907
        """
908
        return [res.asList() if isinstance(res, ParseResults) else res for res in self.__toklist]
909

910
    def asDict(self):
911
        """
912
        Returns the named parse results as a nested dictionary.
913

914
        Example::
915

916
            integer = Word(nums)
917
            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
918

919
            result = date_str.parseString('12/31/1999')
920
            print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
921

922
            result_dict = result.asDict()
923
            print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
924

925
            # even though a ParseResults supports dict-like access, sometime you just need to have a dict
926
            import json
927
            print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
928
            print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
929
        """
930
        if PY_3:
931
            item_fn = self.items
932
        else:
933
            item_fn = self.iteritems
934

935
        def toItem(obj):
936
            if isinstance(obj, ParseResults):
937
                if obj.haskeys():
938
                    return obj.asDict()
939
                else:
940
                    return [toItem(v) for v in obj]
941
            else:
942
                return obj
943

944
        return dict((k, toItem(v)) for k, v in item_fn())
945

946
    def copy(self):
947
        """
948
        Returns a new copy of a :class:`ParseResults` object.
949
        """
950
        ret = ParseResults(self.__toklist)
951
        ret.__tokdict = dict(self.__tokdict.items())
952
        ret.__parent = self.__parent
953
        ret.__accumNames.update(self.__accumNames)
954
        ret.__name = self.__name
955
        return ret
956

957
    def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True):
958
        """
959
        (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
960
        """
961
        nl = "\n"
962
        out = []
963
        namedItems = dict((v[1], k) for (k, vlist) in self.__tokdict.items()
964
                          for v in vlist)
965
        nextLevelIndent = indent + "  "
966

967
        # collapse out indents if formatting is not desired
968
        if not formatted:
969
            indent = ""
970
            nextLevelIndent = ""
971
            nl = ""
972

973
        selfTag = None
974
        if doctag is not None:
975
            selfTag = doctag
976
        else:
977
            if self.__name:
978
                selfTag = self.__name
979

980
        if not selfTag:
981
            if namedItemsOnly:
982
                return ""
983
            else:
984
                selfTag = "ITEM"
985

986
        out += [nl, indent, "<", selfTag, ">"]
987

988
        for i, res in enumerate(self.__toklist):
989
            if isinstance(res, ParseResults):
990
                if i in namedItems:
991
                    out += [res.asXML(namedItems[i],
992
                                      namedItemsOnly and doctag is None,
993
                                      nextLevelIndent,
994
                                      formatted)]
995
                else:
996
                    out += [res.asXML(None,
997
                                      namedItemsOnly and doctag is None,
998
                                      nextLevelIndent,
999
                                      formatted)]
1000
            else:
1001
                # individual token, see if there is a name for it
1002
                resTag = None
1003
                if i in namedItems:
1004
                    resTag = namedItems[i]
1005
                if not resTag:
1006
                    if namedItemsOnly:
1007
                        continue
1008
                    else:
1009
                        resTag = "ITEM"
1010
                xmlBodyText = _xml_escape(_ustr(res))
1011
                out += [nl, nextLevelIndent, "<", resTag, ">",
1012
                        xmlBodyText,
1013
                                                "</", resTag, ">"]
1014

1015
        out += [nl, indent, "</", selfTag, ">"]
1016
        return "".join(out)
1017

1018
    def __lookup(self, sub):
1019
        for k, vlist in self.__tokdict.items():
1020
            for v, loc in vlist:
1021
                if sub is v:
1022
                    return k
1023
        return None
1024

1025
    def getName(self):
1026
        r"""
1027
        Returns the results name for this token expression. Useful when several
1028
        different expressions might match at a particular location.
1029

1030
        Example::
1031

1032
            integer = Word(nums)
1033
            ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
1034
            house_number_expr = Suppress('#') + Word(nums, alphanums)
1035
            user_data = (Group(house_number_expr)("house_number")
1036
                        | Group(ssn_expr)("ssn")
1037
                        | Group(integer)("age"))
1038
            user_info = OneOrMore(user_data)
1039

1040
            result = user_info.parseString("22 111-22-3333 #221B")
1041
            for item in result:
1042
                print(item.getName(), ':', item[0])
1043

1044
        prints::
1045

1046
            age : 22
1047
            ssn : 111-22-3333
1048
            house_number : 221B
1049
        """
1050
        if self.__name:
1051
            return self.__name
1052
        elif self.__parent:
1053
            par = self.__parent()
1054
            if par:
1055
                return par.__lookup(self)
1056
            else:
1057
                return None
1058
        elif (len(self) == 1
1059
              and len(self.__tokdict) == 1
1060
              and next(iter(self.__tokdict.values()))[0][1] in (0, -1)):
1061
            return next(iter(self.__tokdict.keys()))
1062
        else:
1063
            return None
1064

1065
    def dump(self, indent='', full=True, include_list=True, _depth=0):
1066
        """
1067
        Diagnostic method for listing out the contents of
1068
        a :class:`ParseResults`. Accepts an optional ``indent`` argument so
1069
        that this string can be embedded in a nested display of other data.
1070

1071
        Example::
1072

1073
            integer = Word(nums)
1074
            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1075

1076
            result = date_str.parseString('12/31/1999')
1077
            print(result.dump())
1078

1079
        prints::
1080

1081
            ['12', '/', '31', '/', '1999']
1082
            - day: 1999
1083
            - month: 31
1084
            - year: 12
1085
        """
1086
        out = []
1087
        NL = '\n'
1088
        if include_list:
1089
            out.append(indent + _ustr(self.asList()))
1090
        else:
1091
            out.append('')
1092

1093
        if full:
1094
            if self.haskeys():
1095
                items = sorted((str(k), v) for k, v in self.items())
1096
                for k, v in items:
1097
                    if out:
1098
                        out.append(NL)
1099
                    out.append("%s%s- %s: " % (indent, ('  ' * _depth), k))
1100
                    if isinstance(v, ParseResults):
1101
                        if v:
1102
                            out.append(v.dump(indent=indent, full=full, include_list=include_list, _depth=_depth + 1))
1103
                        else:
1104
                            out.append(_ustr(v))
1105
                    else:
1106
                        out.append(repr(v))
1107
            elif any(isinstance(vv, ParseResults) for vv in self):
1108
                v = self
1109
                for i, vv in enumerate(v):
1110
                    if isinstance(vv, ParseResults):
1111
                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
1112
                                                            ('  ' * (_depth)),
1113
                                                            i,
1114
                                                            indent,
1115
                                                            ('  ' * (_depth + 1)),
1116
                                                            vv.dump(indent=indent,
1117
                                                                    full=full,
1118
                                                                    include_list=include_list,
1119
                                                                    _depth=_depth + 1)))
1120
                    else:
1121
                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
1122
                                                            ('  ' * (_depth)),
1123
                                                            i,
1124
                                                            indent,
1125
                                                            ('  ' * (_depth + 1)),
1126
                                                            _ustr(vv)))
1127

1128
        return "".join(out)
1129

1130
    def pprint(self, *args, **kwargs):
1131
        """
1132
        Pretty-printer for parsed results as a list, using the
1133
        `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
1134
        Accepts additional positional or keyword args as defined for
1135
        `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
1136

1137
        Example::
1138

1139
            ident = Word(alphas, alphanums)
1140
            num = Word(nums)
1141
            func = Forward()
1142
            term = ident | num | Group('(' + func + ')')
1143
            func <<= ident + Group(Optional(delimitedList(term)))
1144
            result = func.parseString("fna a,b,(fnb c,d,200),100")
1145
            result.pprint(width=40)
1146

1147
        prints::
1148

1149
            ['fna',
1150
             ['a',
1151
              'b',
1152
              ['(', 'fnb', ['c', 'd', '200'], ')'],
1153
              '100']]
1154
        """
1155
        pprint.pprint(self.asList(), *args, **kwargs)
1156

1157
    # add support for pickle protocol
1158
    def __getstate__(self):
1159
        return (self.__toklist,
1160
                (self.__tokdict.copy(),
1161
                 self.__parent is not None and self.__parent() or None,
1162
                 self.__accumNames,
1163
                 self.__name))
1164

1165
    def __setstate__(self, state):
1166
        self.__toklist = state[0]
1167
        self.__tokdict, par, inAccumNames, self.__name = state[1]
1168
        self.__accumNames = {}
1169
        self.__accumNames.update(inAccumNames)
1170
        if par is not None:
1171
            self.__parent = wkref(par)
1172
        else:
1173
            self.__parent = None
1174

1175
    def __getnewargs__(self):
1176
        return self.__toklist, self.__name, self.__asList, self.__modal
1177

1178
    def __dir__(self):
1179
        return dir(type(self)) + list(self.keys())
1180

1181
    @classmethod
1182
    def from_dict(cls, other, name=None):
1183
        """
1184
        Helper classmethod to construct a ParseResults from a dict, preserving the
1185
        name-value relations as results names. If an optional 'name' argument is
1186
        given, a nested ParseResults will be returned
1187
        """
1188
        def is_iterable(obj):
1189
            try:
1190
                iter(obj)
1191
            except Exception:
1192
                return False
1193
            else:
1194
                if PY_3:
1195
                    return not isinstance(obj, (str, bytes))
1196
                else:
1197
                    return not isinstance(obj, basestring)
1198

1199
        ret = cls([])
1200
        for k, v in other.items():
1201
            if isinstance(v, Mapping):
1202
                ret += cls.from_dict(v, name=k)
1203
            else:
1204
                ret += cls([v], name=k, asList=is_iterable(v))
1205
        if name is not None:
1206
            ret = cls([ret], name=name)
1207
        return ret
1208

1209
MutableMapping.register(ParseResults)
1210

1211
def col (loc, strg):
1212
    """Returns current column within a string, counting newlines as line separators.
1213
   The first column is number 1.
1214

1215
   Note: the default parsing behavior is to expand tabs in the input string
1216
   before starting the parsing process.  See
1217
   :class:`ParserElement.parseString` for more
1218
   information on parsing strings containing ``<TAB>`` s, and suggested
1219
   methods to maintain a consistent view of the parsed string, the parse
1220
   location, and line and column positions within the parsed string.
1221
   """
1222
    s = strg
1223
    return 1 if 0 < loc < len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
1224

1225
def lineno(loc, strg):
1226
    """Returns current line number within a string, counting newlines as line separators.
1227
    The first line is number 1.
1228

1229
    Note - the default parsing behavior is to expand tabs in the input string
1230
    before starting the parsing process.  See :class:`ParserElement.parseString`
1231
    for more information on parsing strings containing ``<TAB>`` s, and
1232
    suggested methods to maintain a consistent view of the parsed string, the
1233
    parse location, and line and column positions within the parsed string.
1234
    """
1235
    return strg.count("\n", 0, loc) + 1
1236

1237
def line(loc, strg):
1238
    """Returns the line of text containing loc within a string, counting newlines as line separators.
1239
       """
1240
    lastCR = strg.rfind("\n", 0, loc)
1241
    nextCR = strg.find("\n", loc)
1242
    if nextCR >= 0:
1243
        return strg[lastCR + 1:nextCR]
1244
    else:
1245
        return strg[lastCR + 1:]
1246

1247
def _defaultStartDebugAction(instring, loc, expr):
1248
    print(("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % (lineno(loc, instring), col(loc, instring))))
1249

1250
def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks):
1251
    print("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1252

1253
def _defaultExceptionDebugAction(instring, loc, expr, exc):
1254
    print("Exception raised:" + _ustr(exc))
1255

1256
def nullDebugAction(*args):
1257
    """'Do-nothing' debug action, to suppress debugging output during parsing."""
1258
    pass
1259

1260
# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
1261
#~ 'decorator to trim function calls to match the arity of the target'
1262
#~ def _trim_arity(func, maxargs=3):
1263
    #~ if func in singleArgBuiltins:
1264
        #~ return lambda s,l,t: func(t)
1265
    #~ limit = 0
1266
    #~ foundArity = False
1267
    #~ def wrapper(*args):
1268
        #~ nonlocal limit,foundArity
1269
        #~ while 1:
1270
            #~ try:
1271
                #~ ret = func(*args[limit:])
1272
                #~ foundArity = True
1273
                #~ return ret
1274
            #~ except TypeError:
1275
                #~ if limit == maxargs or foundArity:
1276
                    #~ raise
1277
                #~ limit += 1
1278
                #~ continue
1279
    #~ return wrapper
1280

1281
# this version is Python 2.x-3.x cross-compatible
1282
'decorator to trim function calls to match the arity of the target'
1283
def _trim_arity(func, maxargs=2):
1284
    if func in singleArgBuiltins:
1285
        return lambda s, l, t: func(t)
1286
    limit = [0]
1287
    foundArity = [False]
1288

1289
    # traceback return data structure changed in Py3.5 - normalize back to plain tuples
1290
    if system_version[:2] >= (3, 5):
1291
        def extract_stack(limit=0):
1292
            # special handling for Python 3.5.0 - extra deep call stack by 1
1293
            offset = -3 if system_version == (3, 5, 0) else -2
1294
            frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset]
1295
            return [frame_summary[:2]]
1296
        def extract_tb(tb, limit=0):
1297
            frames = traceback.extract_tb(tb, limit=limit)
1298
            frame_summary = frames[-1]
1299
            return [frame_summary[:2]]
1300
    else:
1301
        extract_stack = traceback.extract_stack
1302
        extract_tb = traceback.extract_tb
1303

1304
    # synthesize what would be returned by traceback.extract_stack at the call to
1305
    # user's parse action 'func', so that we don't incur call penalty at parse time
1306

1307
    LINE_DIFF = 6
1308
    # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
1309
    # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
1310
    this_line = extract_stack(limit=2)[-1]
1311
    pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF)
1312

1313
    def wrapper(*args):
1314
        while 1:
1315
            try:
1316
                ret = func(*args[limit[0]:])
1317
                foundArity[0] = True
1318
                return ret
1319
            except TypeError:
1320
                # re-raise TypeErrors if they did not come from our arity testing
1321
                if foundArity[0]:
1322
                    raise
1323
                else:
1324
                    try:
1325
                        tb = sys.exc_info()[-1]
1326
                        if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1327
                            raise
1328
                    finally:
1329
                        try:
1330
                            del tb
1331
                        except NameError:
1332
                            pass
1333

1334
                if limit[0] <= maxargs:
1335
                    limit[0] += 1
1336
                    continue
1337
                raise
1338

1339
    # copy func name to wrapper for sensible debug output
1340
    func_name = "<parse action>"
1341
    try:
1342
        func_name = getattr(func, '__name__',
1343
                            getattr(func, '__class__').__name__)
1344
    except Exception:
1345
        func_name = str(func)
1346
    wrapper.__name__ = func_name
1347

1348
    return wrapper
1349

1350

1351
class ParserElement(object):
1352
    """Abstract base level parser element class."""
1353
    DEFAULT_WHITE_CHARS = " \n\t\r"
1354
    verbose_stacktrace = False
1355

1356
    @staticmethod
1357
    def setDefaultWhitespaceChars(chars):
1358
        r"""
1359
        Overrides the default whitespace chars
1360

1361
        Example::
1362

1363
            # default whitespace chars are space, <TAB> and newline
1364
            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl']
1365

1366
            # change to just treat newline as significant
1367
            ParserElement.setDefaultWhitespaceChars(" \t")
1368
            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def']
1369
        """
1370
        ParserElement.DEFAULT_WHITE_CHARS = chars
1371

1372
    @staticmethod
1373
    def inlineLiteralsUsing(cls):
1374
        """
1375
        Set class to be used for inclusion of string literals into a parser.
1376

1377
        Example::
1378

1379
            # default literal class used is Literal
1380
            integer = Word(nums)
1381
            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1382

1383
            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
1384

1385

1386
            # change to Suppress
1387
            ParserElement.inlineLiteralsUsing(Suppress)
1388
            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1389

1390
            date_str.parseString("1999/12/31")  # -> ['1999', '12', '31']
1391
        """
1392
        ParserElement._literalStringClass = cls
1393

1394
    @classmethod
1395
    def _trim_traceback(cls, tb):
1396
        while tb.tb_next:
1397
            tb = tb.tb_next
1398
        return tb
1399

1400
    def __init__(self, savelist=False):
1401
        self.parseAction = list()
1402
        self.failAction = None
1403
        # ~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
1404
        self.strRepr = None
1405
        self.resultsName = None
1406
        self.saveAsList = savelist
1407
        self.skipWhitespace = True
1408
        self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
1409
        self.copyDefaultWhiteChars = True
1410
        self.mayReturnEmpty = False # used when checking for left-recursion
1411
        self.keepTabs = False
1412
        self.ignoreExprs = list()
1413
        self.debug = False
1414
        self.streamlined = False
1415
        self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
1416
        self.errmsg = ""
1417
        self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
1418
        self.debugActions = (None, None, None)  # custom debug actions
1419
        self.re = None
1420
        self.callPreparse = True # used to avoid redundant calls to preParse
1421
        self.callDuringTry = False
1422

1423
    def copy(self):
1424
        """
1425
        Make a copy of this :class:`ParserElement`.  Useful for defining
1426
        different parse actions for the same parsing pattern, using copies of
1427
        the original parse element.
1428

1429
        Example::
1430

1431
            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1432
            integerK = integer.copy().addParseAction(lambda toks: toks[0] * 1024) + Suppress("K")
1433
            integerM = integer.copy().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
1434

1435
            print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1436

1437
        prints::
1438

1439
            [5120, 100, 655360, 268435456]
1440

1441
        Equivalent form of ``expr.copy()`` is just ``expr()``::
1442

1443
            integerM = integer().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
1444
        """
1445
        cpy = copy.copy(self)
1446
        cpy.parseAction = self.parseAction[:]
1447
        cpy.ignoreExprs = self.ignoreExprs[:]
1448
        if self.copyDefaultWhiteChars:
1449
            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1450
        return cpy
1451

1452
    def setName(self, name):
1453
        """
1454
        Define name for this expression, makes debugging and exception messages clearer.
1455

1456
        Example::
1457

1458
            Word(nums).parseString("ABC")  # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1459
            Word(nums).setName("integer").parseString("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1)
1460
        """
1461
        self.name = name
1462
        self.errmsg = "Expected " + self.name
1463
        if __diag__.enable_debug_on_named_expressions:
1464
            self.setDebug()
1465
        return self
1466

1467
    def setResultsName(self, name, listAllMatches=False):
1468
        """
1469
        Define name for referencing matching tokens as a nested attribute
1470
        of the returned parse results.
1471
        NOTE: this returns a *copy* of the original :class:`ParserElement` object;
1472
        this is so that the client can define a basic element, such as an
1473
        integer, and reference it in multiple places with different names.
1474

1475
        You can also set results names using the abbreviated syntax,
1476
        ``expr("name")`` in place of ``expr.setResultsName("name")``
1477
        - see :class:`__call__`.
1478

1479
        Example::
1480

1481
            date_str = (integer.setResultsName("year") + '/'
1482
                        + integer.setResultsName("month") + '/'
1483
                        + integer.setResultsName("day"))
1484

1485
            # equivalent form:
1486
            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1487
        """
1488
        return self._setResultsName(name, listAllMatches)
1489

1490
    def _setResultsName(self, name, listAllMatches=False):
1491
        newself = self.copy()
1492
        if name.endswith("*"):
1493
            name = name[:-1]
1494
            listAllMatches = True
1495
        newself.resultsName = name
1496
        newself.modalResults = not listAllMatches
1497
        return newself
1498

1499
    def setBreak(self, breakFlag=True):
1500
        """Method to invoke the Python pdb debugger when this element is
1501
           about to be parsed. Set ``breakFlag`` to True to enable, False to
1502
           disable.
1503
        """
1504
        if breakFlag:
1505
            _parseMethod = self._parse
1506
            def breaker(instring, loc, doActions=True, callPreParse=True):
1507
                import pdb
1508
                # this call to pdb.set_trace() is intentional, not a checkin error
1509
                pdb.set_trace()
1510
                return _parseMethod(instring, loc, doActions, callPreParse)
1511
            breaker._originalParseMethod = _parseMethod
1512
            self._parse = breaker
1513
        else:
1514
            if hasattr(self._parse, "_originalParseMethod"):
1515
                self._parse = self._parse._originalParseMethod
1516
        return self
1517

1518
    def setParseAction(self, *fns, **kwargs):
1519
        """
1520
        Define one or more actions to perform when successfully matching parse element definition.
1521
        Parse action fn is a callable method with 0-3 arguments, called as ``fn(s, loc, toks)`` ,
1522
        ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
1523

1524
        - s   = the original string being parsed (see note below)
1525
        - loc = the location of the matching substring
1526
        - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object
1527

1528
        If the functions in fns modify the tokens, they can return them as the return
1529
        value from fn, and the modified list of tokens will replace the original.
1530
        Otherwise, fn does not need to return any value.
1531

1532
        If None is passed as the parse action, all previously added parse actions for this
1533
        expression are cleared.
1534

1535
        Optional keyword arguments:
1536
        - callDuringTry = (default= ``False``) indicate if parse action should be run during lookaheads and alternate testing
1537

1538
        Note: the default parsing behavior is to expand tabs in the input string
1539
        before starting the parsing process.  See :class:`parseString for more
1540
        information on parsing strings containing ``<TAB>`` s, and suggested
1541
        methods to maintain a consistent view of the parsed string, the parse
1542
        location, and line and column positions within the parsed string.
1543

1544
        Example::
1545

1546
            integer = Word(nums)
1547
            date_str = integer + '/' + integer + '/' + integer
1548

1549
            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
1550

1551
            # use parse action to convert to ints at parse time
1552
            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1553
            date_str = integer + '/' + integer + '/' + integer
1554

1555
            # note that integer fields are now ints, not strings
1556
            date_str.parseString("1999/12/31")  # -> [1999, '/', 12, '/', 31]
1557
        """
1558
        if list(fns) == [None,]:
1559
            self.parseAction = []
1560
        else:
1561
            if not all(callable(fn) for fn in fns):
1562
                raise TypeError("parse actions must be callable")
1563
            self.parseAction = list(map(_trim_arity, list(fns)))
1564
            self.callDuringTry = kwargs.get("callDuringTry", False)
1565
        return self
1566

1567
    def addParseAction(self, *fns, **kwargs):
1568
        """
1569
        Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`.
1570

1571
        See examples in :class:`copy`.
1572
        """
1573
        self.parseAction += list(map(_trim_arity, list(fns)))
1574
        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1575
        return self
1576

1577
    def addCondition(self, *fns, **kwargs):
1578
        """Add a boolean predicate function to expression's list of parse actions. See
1579
        :class:`setParseAction` for function call signatures. Unlike ``setParseAction``,
1580
        functions passed to ``addCondition`` need to return boolean success/fail of the condition.
1581

1582
        Optional keyword arguments:
1583
        - message = define a custom message to be used in the raised exception
1584
        - fatal   = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1585

1586
        Example::
1587

1588
            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1589
            year_int = integer.copy()
1590
            year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1591
            date_str = year_int + '/' + integer + '/' + integer
1592

1593
            result = date_str.parseString("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1594
        """
1595
        for fn in fns:
1596
            self.parseAction.append(conditionAsParseAction(fn, message=kwargs.get('message'),
1597
                                                           fatal=kwargs.get('fatal', False)))
1598

1599
        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1600
        return self
1601

1602
    def setFailAction(self, fn):
1603
        """Define action to perform if parsing fails at this expression.
1604
           Fail acton fn is a callable function that takes the arguments
1605
           ``fn(s, loc, expr, err)`` where:
1606
           - s = string being parsed
1607
           - loc = location where expression match was attempted and failed
1608
           - expr = the parse expression that failed
1609
           - err = the exception thrown
1610
           The function returns no value.  It may throw :class:`ParseFatalException`
1611
           if it is desired to stop parsing immediately."""
1612
        self.failAction = fn
1613
        return self
1614

1615
    def _skipIgnorables(self, instring, loc):
1616
        exprsFound = True
1617
        while exprsFound:
1618
            exprsFound = False
1619
            for e in self.ignoreExprs:
1620
                try:
1621
                    while 1:
1622
                        loc, dummy = e._parse(instring, loc)
1623
                        exprsFound = True
1624
                except ParseException:
1625
                    pass
1626
        return loc
1627

1628
    def preParse(self, instring, loc):
1629
        if self.ignoreExprs:
1630
            loc = self._skipIgnorables(instring, loc)
1631

1632
        if self.skipWhitespace:
1633
            wt = self.whiteChars
1634
            instrlen = len(instring)
1635
            while loc < instrlen and instring[loc] in wt:
1636
                loc += 1
1637

1638
        return loc
1639

1640
    def parseImpl(self, instring, loc, doActions=True):
1641
        return loc, []
1642

1643
    def postParse(self, instring, loc, tokenlist):
1644
        return tokenlist
1645

1646
    # ~ @profile
1647
    def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True):
1648
        TRY, MATCH, FAIL = 0, 1, 2
1649
        debugging = (self.debug)  # and doActions)
1650

1651
        if debugging or self.failAction:
1652
            # ~ print ("Match", self, "at loc", loc, "(%d, %d)" % (lineno(loc, instring), col(loc, instring)))
1653
            if self.debugActions[TRY]:
1654
                self.debugActions[TRY](instring, loc, self)
1655
            try:
1656
                if callPreParse and self.callPreparse:
1657
                    preloc = self.preParse(instring, loc)
1658
                else:
1659
                    preloc = loc
1660
                tokensStart = preloc
1661
                if self.mayIndexError or preloc >= len(instring):
1662
                    try:
1663
                        loc, tokens = self.parseImpl(instring, preloc, doActions)
1664
                    except IndexError:
1665
                        raise ParseException(instring, len(instring), self.errmsg, self)
1666
                else:
1667
                    loc, tokens = self.parseImpl(instring, preloc, doActions)
1668
            except Exception as err:
1669
                # ~ print ("Exception raised:", err)
1670
                if self.debugActions[FAIL]:
1671
                    self.debugActions[FAIL](instring, tokensStart, self, err)
1672
                if self.failAction:
1673
                    self.failAction(instring, tokensStart, self, err)
1674
                raise
1675
        else:
1676
            if callPreParse and self.callPreparse:
1677
                preloc = self.preParse(instring, loc)
1678
            else:
1679
                preloc = loc
1680
            tokensStart = preloc
1681
            if self.mayIndexError or preloc >= len(instring):
1682
                try:
1683
                    loc, tokens = self.parseImpl(instring, preloc, doActions)
1684
                except IndexError:
1685
                    raise ParseException(instring, len(instring), self.errmsg, self)
1686
            else:
1687
                loc, tokens = self.parseImpl(instring, preloc, doActions)
1688

1689
        tokens = self.postParse(instring, loc, tokens)
1690

1691
        retTokens = ParseResults(tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults)
1692
        if self.parseAction and (doActions or self.callDuringTry):
1693
            if debugging:
1694
                try:
1695
                    for fn in self.parseAction:
1696
                        try:
1697
                            tokens = fn(instring, tokensStart, retTokens)
1698
                        except IndexError as parse_action_exc:
1699
                            exc = ParseException("exception raised in parse action")
1700
                            exc.__cause__ = parse_action_exc
1701
                            raise exc
1702

1703
                        if tokens is not None and tokens is not retTokens:
1704
                            retTokens = ParseResults(tokens,
1705
                                                      self.resultsName,
1706
                                                      asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
1707
                                                      modal=self.modalResults)
1708
                except Exception as err:
1709
                    # ~ print "Exception raised in user parse action:", err
1710
                    if self.debugActions[FAIL]:
1711
                        self.debugActions[FAIL](instring, tokensStart, self, err)
1712
                    raise
1713
            else:
1714
                for fn in self.parseAction:
1715
                    try:
1716
                        tokens = fn(instring, tokensStart, retTokens)
1717
                    except IndexError as parse_action_exc:
1718
                        exc = ParseException("exception raised in parse action")
1719
                        exc.__cause__ = parse_action_exc
1720
                        raise exc
1721

1722
                    if tokens is not None and tokens is not retTokens:
1723
                        retTokens = ParseResults(tokens,
1724
                                                  self.resultsName,
1725
                                                  asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
1726
                                                  modal=self.modalResults)
1727
        if debugging:
1728
            # ~ print ("Matched", self, "->", retTokens.asList())
1729
            if self.debugActions[MATCH]:
1730
                self.debugActions[MATCH](instring, tokensStart, loc, self, retTokens)
1731

1732
        return loc, retTokens
1733

1734
    def tryParse(self, instring, loc):
1735
        try:
1736
            return self._parse(instring, loc, doActions=False)[0]
1737
        except ParseFatalException:
1738
            raise ParseException(instring, loc, self.errmsg, self)
1739

1740
    def canParseNext(self, instring, loc):
1741
        try:
1742
            self.tryParse(instring, loc)
1743
        except (ParseException, IndexError):
1744
            return False
1745
        else:
1746
            return True
1747

1748
    class _UnboundedCache(object):
1749
        def __init__(self):
1750
            cache = {}
1751
            self.not_in_cache = not_in_cache = object()
1752

1753
            def get(self, key):
1754
                return cache.get(key, not_in_cache)
1755

1756
            def set(self, key, value):
1757
                cache[key] = value
1758

1759
            def clear(self):
1760
                cache.clear()
1761

1762
            def cache_len(self):
1763
                return len(cache)
1764

1765
            self.get = types.MethodType(get, self)
1766
            self.set = types.MethodType(set, self)
1767
            self.clear = types.MethodType(clear, self)
1768
            self.__len__ = types.MethodType(cache_len, self)
1769

1770
    if _OrderedDict is not None:
1771
        class _FifoCache(object):
1772
            def __init__(self, size):
1773
                self.not_in_cache = not_in_cache = object()
1774

1775
                cache = _OrderedDict()
1776

1777
                def get(self, key):
1778
                    return cache.get(key, not_in_cache)
1779

1780
                def set(self, key, value):
1781
                    cache[key] = value
1782
                    while len(cache) > size:
1783
                        try:
1784
                            cache.popitem(False)
1785
                        except KeyError:
1786
                            pass
1787

1788
                def clear(self):
1789
                    cache.clear()
1790

1791
                def cache_len(self):
1792
                    return len(cache)
1793

1794
                self.get = types.MethodType(get, self)
1795
                self.set = types.MethodType(set, self)
1796
                self.clear = types.MethodType(clear, self)
1797
                self.__len__ = types.MethodType(cache_len, self)
1798

1799
    else:
1800
        class _FifoCache(object):
1801
            def __init__(self, size):
1802
                self.not_in_cache = not_in_cache = object()
1803

1804
                cache = {}
1805
                key_fifo = collections.deque([], size)
1806

1807
                def get(self, key):
1808
                    return cache.get(key, not_in_cache)
1809

1810
                def set(self, key, value):
1811
                    cache[key] = value
1812
                    while len(key_fifo) > size:
1813
                        cache.pop(key_fifo.popleft(), None)
1814
                    key_fifo.append(key)
1815

1816
                def clear(self):
1817
                    cache.clear()
1818
                    key_fifo.clear()
1819

1820
                def cache_len(self):
1821
                    return len(cache)
1822

1823
                self.get = types.MethodType(get, self)
1824
                self.set = types.MethodType(set, self)
1825
                self.clear = types.MethodType(clear, self)
1826
                self.__len__ = types.MethodType(cache_len, self)
1827

1828
    # argument cache for optimizing repeated calls when backtracking through recursive expressions
1829
    packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
1830
    packrat_cache_lock = RLock()
1831
    packrat_cache_stats = [0, 0]
1832

1833
    # this method gets repeatedly called during backtracking with the same arguments -
1834
    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1835
    def _parseCache(self, instring, loc, doActions=True, callPreParse=True):
1836
        HIT, MISS = 0, 1
1837
        lookup = (self, instring, loc, callPreParse, doActions)
1838
        with ParserElement.packrat_cache_lock:
1839
            cache = ParserElement.packrat_cache
1840
            value = cache.get(lookup)
1841
            if value is cache.not_in_cache:
1842
                ParserElement.packrat_cache_stats[MISS] += 1
1843
                try:
1844
                    value = self._parseNoCache(instring, loc, doActions, callPreParse)
1845
                except ParseBaseException as pe:
1846
                    # cache a copy of the exception, without the traceback
1847
                    cache.set(lookup, pe.__class__(*pe.args))
1848
                    raise
1849
                else:
1850
                    cache.set(lookup, (value[0], value[1].copy()))
1851
                    return value
1852
            else:
1853
                ParserElement.packrat_cache_stats[HIT] += 1
1854
                if isinstance(value, Exception):
1855
                    raise value
1856
                return value[0], value[1].copy()
1857

1858
    _parse = _parseNoCache
1859

1860
    @staticmethod
1861
    def resetCache():
1862
        ParserElement.packrat_cache.clear()
1863
        ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1864

1865
    _packratEnabled = False
1866
    @staticmethod
1867
    def enablePackrat(cache_size_limit=128):
1868
        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1869
           Repeated parse attempts at the same string location (which happens
1870
           often in many complex grammars) can immediately return a cached value,
1871
           instead of re-executing parsing/validating code.  Memoizing is done of
1872
           both valid results and parsing exceptions.
1873

1874
           Parameters:
1875

1876
           - cache_size_limit - (default= ``128``) - if an integer value is provided
1877
             will limit the size of the packrat cache; if None is passed, then
1878
             the cache size will be unbounded; if 0 is passed, the cache will
1879
             be effectively disabled.
1880

1881
           This speedup may break existing programs that use parse actions that
1882
           have side-effects.  For this reason, packrat parsing is disabled when
1883
           you first import pyparsing.  To activate the packrat feature, your
1884
           program must call the class method :class:`ParserElement.enablePackrat`.
1885
           For best results, call ``enablePackrat()`` immediately after
1886
           importing pyparsing.
1887

1888
           Example::
1889

1890
               from pip._vendor import pyparsing
1891
               pyparsing.ParserElement.enablePackrat()
1892
        """
1893
        if not ParserElement._packratEnabled:
1894
            ParserElement._packratEnabled = True
1895
            if cache_size_limit is None:
1896
                ParserElement.packrat_cache = ParserElement._UnboundedCache()
1897
            else:
1898
                ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1899
            ParserElement._parse = ParserElement._parseCache
1900

1901
    def parseString(self, instring, parseAll=False):
1902
        """
1903
        Execute the parse expression with the given string.
1904
        This is the main interface to the client code, once the complete
1905
        expression has been built.
1906

1907
        Returns the parsed data as a :class:`ParseResults` object, which may be
1908
        accessed as a list, or as a dict or object with attributes if the given parser
1909
        includes results names.
1910

1911
        If you want the grammar to require that the entire input string be
1912
        successfully parsed, then set ``parseAll`` to True (equivalent to ending
1913
        the grammar with ``StringEnd()``).
1914

1915
        Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string,
1916
        in order to report proper column numbers in parse actions.
1917
        If the input string contains tabs and
1918
        the grammar uses parse actions that use the ``loc`` argument to index into the
1919
        string being parsed, you can ensure you have a consistent view of the input
1920
        string by:
1921

1922
        - calling ``parseWithTabs`` on your grammar before calling ``parseString``
1923
          (see :class:`parseWithTabs`)
1924
        - define your parse action using the full ``(s, loc, toks)`` signature, and
1925
          reference the input string using the parse action's ``s`` argument
1926
        - explictly expand the tabs in your input string before calling
1927
          ``parseString``
1928

1929
        Example::
1930

1931
            Word('a').parseString('aaaaabaaa')  # -> ['aaaaa']
1932
            Word('a').parseString('aaaaabaaa', parseAll=True)  # -> Exception: Expected end of text
1933
        """
1934
        ParserElement.resetCache()
1935
        if not self.streamlined:
1936
            self.streamline()
1937
            # ~ self.saveAsList = True
1938
        for e in self.ignoreExprs:
1939
            e.streamline()
1940
        if not self.keepTabs:
1941
            instring = instring.expandtabs()
1942
        try:
1943
            loc, tokens = self._parse(instring, 0)
1944
            if parseAll:
1945
                loc = self.preParse(instring, loc)
1946
                se = Empty() + StringEnd()
1947
                se._parse(instring, loc)
1948
        except ParseBaseException as exc:
1949
            if ParserElement.verbose_stacktrace:
1950
                raise
1951
            else:
1952
                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1953
                if getattr(exc, '__traceback__', None) is not None:
1954
                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
1955
                raise exc
1956
        else:
1957
            return tokens
1958

1959
    def scanString(self, instring, maxMatches=_MAX_INT, overlap=False):
1960
        """
1961
        Scan the input string for expression matches.  Each match will return the
1962
        matching tokens, start location, and end location.  May be called with optional
1963
        ``maxMatches`` argument, to clip scanning after 'n' matches are found.  If
1964
        ``overlap`` is specified, then overlapping matches will be reported.
1965

1966
        Note that the start and end locations are reported relative to the string
1967
        being parsed.  See :class:`parseString` for more information on parsing
1968
        strings with embedded tabs.
1969

1970
        Example::
1971

1972
            source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1973
            print(source)
1974
            for tokens, start, end in Word(alphas).scanString(source):
1975
                print(' '*start + '^'*(end-start))
1976
                print(' '*start + tokens[0])
1977

1978
        prints::
1979

1980
            sldjf123lsdjjkf345sldkjf879lkjsfd987
1981
            ^^^^^
1982
            sldjf
1983
                    ^^^^^^^
1984
                    lsdjjkf
1985
                              ^^^^^^
1986
                              sldkjf
1987
                                       ^^^^^^
1988
                                       lkjsfd
1989
        """
1990
        if not self.streamlined:
1991
            self.streamline()
1992
        for e in self.ignoreExprs:
1993
            e.streamline()
1994

1995
        if not self.keepTabs:
1996
            instring = _ustr(instring).expandtabs()
1997
        instrlen = len(instring)
1998
        loc = 0
1999
        preparseFn = self.preParse
2000
        parseFn = self._parse
2001
        ParserElement.resetCache()
2002
        matches = 0
2003
        try:
2004
            while loc <= instrlen and matches < maxMatches:
2005
                try:
2006
                    preloc = preparseFn(instring, loc)
2007
                    nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
2008
                except ParseException:
2009
                    loc = preloc + 1
2010
                else:
2011
                    if nextLoc > loc:
2012
                        matches += 1
2013
                        yield tokens, preloc, nextLoc
2014
                        if overlap:
2015
                            nextloc = preparseFn(instring, loc)
2016
                            if nextloc > loc:
2017
                                loc = nextLoc
2018
                            else:
2019
                                loc += 1
2020
                        else:
2021
                            loc = nextLoc
2022
                    else:
2023
                        loc = preloc + 1
2024
        except ParseBaseException as exc:
2025
            if ParserElement.verbose_stacktrace:
2026
                raise
2027
            else:
2028
                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
2029
                if getattr(exc, '__traceback__', None) is not None:
2030
                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2031
                raise exc
2032

2033
    def transformString(self, instring):
2034
        """
2035
        Extension to :class:`scanString`, to modify matching text with modified tokens that may
2036
        be returned from a parse action.  To use ``transformString``, define a grammar and
2037
        attach a parse action to it that modifies the returned token list.
2038
        Invoking ``transformString()`` on a target string will then scan for matches,
2039
        and replace the matched text patterns according to the logic in the parse
2040
        action.  ``transformString()`` returns the resulting transformed string.
2041

2042
        Example::
2043

2044
            wd = Word(alphas)
2045
            wd.setParseAction(lambda toks: toks[0].title())
2046

2047
            print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
2048

2049
        prints::
2050

2051
            Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
2052
        """
2053
        out = []
2054
        lastE = 0
2055
        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
2056
        # keep string locs straight between transformString and scanString
2057
        self.keepTabs = True
2058
        try:
2059
            for t, s, e in self.scanString(instring):
2060
                out.append(instring[lastE:s])
2061
                if t:
2062
                    if isinstance(t, ParseResults):
2063
                        out += t.asList()
2064
                    elif isinstance(t, list):
2065
                        out += t
2066
                    else:
2067
                        out.append(t)
2068
                lastE = e
2069
            out.append(instring[lastE:])
2070
            out = [o for o in out if o]
2071
            return "".join(map(_ustr, _flatten(out)))
2072
        except ParseBaseException as exc:
2073
            if ParserElement.verbose_stacktrace:
2074
                raise
2075
            else:
2076
                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
2077
                if getattr(exc, '__traceback__', None) is not None:
2078
                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2079
                raise exc
2080

2081
    def searchString(self, instring, maxMatches=_MAX_INT):
2082
        """
2083
        Another extension to :class:`scanString`, simplifying the access to the tokens found
2084
        to match the given parse expression.  May be called with optional
2085
        ``maxMatches`` argument, to clip searching after 'n' matches are found.
2086

2087
        Example::
2088

2089
            # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
2090
            cap_word = Word(alphas.upper(), alphas.lower())
2091

2092
            print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
2093

2094
            # the sum() builtin can be used to merge results into a single ParseResults object
2095
            print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
2096

2097
        prints::
2098

2099
            [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
2100
            ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
2101
        """
2102
        try:
2103
            return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)])
2104
        except ParseBaseException as exc:
2105
            if ParserElement.verbose_stacktrace:
2106
                raise
2107
            else:
2108
                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
2109
                if getattr(exc, '__traceback__', None) is not None:
2110
                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2111
                raise exc
2112

2113
    def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
2114
        """
2115
        Generator method to split a string using the given expression as a separator.
2116
        May be called with optional ``maxsplit`` argument, to limit the number of splits;
2117
        and the optional ``includeSeparators`` argument (default= ``False``), if the separating
2118
        matching text should be included in the split results.
2119

2120
        Example::
2121

2122
            punc = oneOf(list(".,;:/-!?"))
2123
            print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
2124

2125
        prints::
2126

2127
            ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
2128
        """
2129
        splits = 0
2130
        last = 0
2131
        for t, s, e in self.scanString(instring, maxMatches=maxsplit):
2132
            yield instring[last:s]
2133
            if includeSeparators:
2134
                yield t[0]
2135
            last = e
2136
        yield instring[last:]
2137

2138
    def __add__(self, other):
2139
        """
2140
        Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement
2141
        converts them to :class:`Literal`s by default.
2142

2143
        Example::
2144

2145
            greet = Word(alphas) + "," + Word(alphas) + "!"
2146
            hello = "Hello, World!"
2147
            print (hello, "->", greet.parseString(hello))
2148

2149
        prints::
2150

2151
            Hello, World! -> ['Hello', ',', 'World', '!']
2152

2153
        ``...`` may be used as a parse expression as a short form of :class:`SkipTo`.
2154

2155
            Literal('start') + ... + Literal('end')
2156

2157
        is equivalent to:
2158

2159
            Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
2160

2161
        Note that the skipped text is returned with '_skipped' as a results name,
2162
        and to support having multiple skips in the same parser, the value returned is
2163
        a list of all skipped text.
2164
        """
2165
        if other is Ellipsis:
2166
            return _PendingSkip(self)
2167

2168
        if isinstance(other, basestring):
2169
            other = self._literalStringClass(other)
2170
        if not isinstance(other, ParserElement):
2171
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2172
                          SyntaxWarning, stacklevel=2)
2173
            return None
2174
        return And([self, other])
2175

2176
    def __radd__(self, other):
2177
        """
2178
        Implementation of + operator when left operand is not a :class:`ParserElement`
2179
        """
2180
        if other is Ellipsis:
2181
            return SkipTo(self)("_skipped*") + self
2182

2183
        if isinstance(other, basestring):
2184
            other = self._literalStringClass(other)
2185
        if not isinstance(other, ParserElement):
2186
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2187
                          SyntaxWarning, stacklevel=2)
2188
            return None
2189
        return other + self
2190

2191
    def __sub__(self, other):
2192
        """
2193
        Implementation of - operator, returns :class:`And` with error stop
2194
        """
2195
        if isinstance(other, basestring):
2196
            other = self._literalStringClass(other)
2197
        if not isinstance(other, ParserElement):
2198
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2199
                          SyntaxWarning, stacklevel=2)
2200
            return None
2201
        return self + And._ErrorStop() + other
2202

2203
    def __rsub__(self, other):
2204
        """
2205
        Implementation of - operator when left operand is not a :class:`ParserElement`
2206
        """
2207
        if isinstance(other, basestring):
2208
            other = self._literalStringClass(other)
2209
        if not isinstance(other, ParserElement):
2210
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2211
                          SyntaxWarning, stacklevel=2)
2212
            return None
2213
        return other - self
2214

2215
    def __mul__(self, other):
2216
        """
2217
        Implementation of * operator, allows use of ``expr * 3`` in place of
2218
        ``expr + expr + expr``.  Expressions may also me multiplied by a 2-integer
2219
        tuple, similar to ``{min, max}`` multipliers in regular expressions.  Tuples
2220
        may also include ``None`` as in:
2221
         - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
2222
              to ``expr*n + ZeroOrMore(expr)``
2223
              (read as "at least n instances of ``expr``")
2224
         - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
2225
              (read as "0 to n instances of ``expr``")
2226
         - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
2227
         - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
2228

2229
        Note that ``expr*(None, n)`` does not raise an exception if
2230
        more than n exprs exist in the input stream; that is,
2231
        ``expr*(None, n)`` does not enforce a maximum number of expr
2232
        occurrences.  If this behavior is desired, then write
2233
        ``expr*(None, n) + ~expr``
2234
        """
2235
        if other is Ellipsis:
2236
            other = (0, None)
2237
        elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
2238
            other = ((0, ) + other[1:] + (None,))[:2]
2239

2240
        if isinstance(other, int):
2241
            minElements, optElements = other, 0
2242
        elif isinstance(other, tuple):
2243
            other = tuple(o if o is not Ellipsis else None for o in other)
2244
            other = (other + (None, None))[:2]
2245
            if other[0] is None:
2246
                other = (0, other[1])
2247
            if isinstance(other[0], int) and other[1] is None:
2248
                if other[0] == 0:
2249
                    return ZeroOrMore(self)
2250
                if other[0] == 1:
2251
                    return OneOrMore(self)
2252
                else:
2253
                    return self * other[0] + ZeroOrMore(self)
2254
            elif isinstance(other[0], int) and isinstance(other[1], int):
2255
                minElements, optElements = other
2256
                optElements -= minElements
2257
            else:
2258
                raise TypeError("cannot multiply 'ParserElement' and ('%s', '%s') objects", type(other[0]), type(other[1]))
2259
        else:
2260
            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
2261

2262
        if minElements < 0:
2263
            raise ValueError("cannot multiply ParserElement by negative value")
2264
        if optElements < 0:
2265
            raise ValueError("second tuple value must be greater or equal to first tuple value")
2266
        if minElements == optElements == 0:
2267
            raise ValueError("cannot multiply ParserElement by 0 or (0, 0)")
2268

2269
        if optElements:
2270
            def makeOptionalList(n):
2271
                if n > 1:
2272
                    return Optional(self + makeOptionalList(n - 1))
2273
                else:
2274
                    return Optional(self)
2275
            if minElements:
2276
                if minElements == 1:
2277
                    ret = self + makeOptionalList(optElements)
2278
                else:
2279
                    ret = And([self] * minElements) + makeOptionalList(optElements)
2280
            else:
2281
                ret = makeOptionalList(optElements)
2282
        else:
2283
            if minElements == 1:
2284
                ret = self
2285
            else:
2286
                ret = And([self] * minElements)
2287
        return ret
2288

2289
    def __rmul__(self, other):
2290
        return self.__mul__(other)
2291

2292
    def __or__(self, other):
2293
        """
2294
        Implementation of | operator - returns :class:`MatchFirst`
2295
        """
2296
        if other is Ellipsis:
2297
            return _PendingSkip(self, must_skip=True)
2298

2299
        if isinstance(other, basestring):
2300
            other = self._literalStringClass(other)
2301
        if not isinstance(other, ParserElement):
2302
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2303
                          SyntaxWarning, stacklevel=2)
2304
            return None
2305
        return MatchFirst([self, other])
2306

2307
    def __ror__(self, other):
2308
        """
2309
        Implementation of | operator when left operand is not a :class:`ParserElement`
2310
        """
2311
        if isinstance(other, basestring):
2312
            other = self._literalStringClass(other)
2313
        if not isinstance(other, ParserElement):
2314
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2315
                          SyntaxWarning, stacklevel=2)
2316
            return None
2317
        return other | self
2318

2319
    def __xor__(self, other):
2320
        """
2321
        Implementation of ^ operator - returns :class:`Or`
2322
        """
2323
        if isinstance(other, basestring):
2324
            other = self._literalStringClass(other)
2325
        if not isinstance(other, ParserElement):
2326
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2327
                          SyntaxWarning, stacklevel=2)
2328
            return None
2329
        return Or([self, other])
2330

2331
    def __rxor__(self, other):
2332
        """
2333
        Implementation of ^ operator when left operand is not a :class:`ParserElement`
2334
        """
2335
        if isinstance(other, basestring):
2336
            other = self._literalStringClass(other)
2337
        if not isinstance(other, ParserElement):
2338
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2339
                          SyntaxWarning, stacklevel=2)
2340
            return None
2341
        return other ^ self
2342

2343
    def __and__(self, other):
2344
        """
2345
        Implementation of & operator - returns :class:`Each`
2346
        """
2347
        if isinstance(other, basestring):
2348
            other = self._literalStringClass(other)
2349
        if not isinstance(other, ParserElement):
2350
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2351
                          SyntaxWarning, stacklevel=2)
2352
            return None
2353
        return Each([self, other])
2354

2355
    def __rand__(self, other):
2356
        """
2357
        Implementation of & operator when left operand is not a :class:`ParserElement`
2358
        """
2359
        if isinstance(other, basestring):
2360
            other = self._literalStringClass(other)
2361
        if not isinstance(other, ParserElement):
2362
            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2363
                          SyntaxWarning, stacklevel=2)
2364
            return None
2365
        return other & self
2366

2367
    def __invert__(self):
2368
        """
2369
        Implementation of ~ operator - returns :class:`NotAny`
2370
        """
2371
        return NotAny(self)
2372

2373
    def __iter__(self):
2374
        # must implement __iter__ to override legacy use of sequential access to __getitem__ to
2375
        # iterate over a sequence
2376
        raise TypeError('%r object is not iterable' % self.__class__.__name__)
2377

2378
    def __getitem__(self, key):
2379
        """
2380
        use ``[]`` indexing notation as a short form for expression repetition:
2381
         - ``expr[n]`` is equivalent to ``expr*n``
2382
         - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
2383
         - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
2384
              to ``expr*n + ZeroOrMore(expr)``
2385
              (read as "at least n instances of ``expr``")
2386
         - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
2387
              (read as "0 to n instances of ``expr``")
2388
         - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
2389
         - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
2390
         ``None`` may be used in place of ``...``.
2391

2392
        Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
2393
        if more than ``n`` ``expr``s exist in the input stream.  If this behavior is
2394
        desired, then write ``expr[..., n] + ~expr``.
2395
       """
2396

2397
        # convert single arg keys to tuples
2398
        try:
2399
            if isinstance(key, str):
2400
                key = (key,)
2401
            iter(key)
2402
        except TypeError:
2403
            key = (key, key)
2404

2405
        if len(key) > 2:
2406
            warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5],
2407
                                                                                '... [{0}]'.format(len(key))
2408
                                                                                if len(key) > 5 else ''))
2409

2410
        # clip to 2 elements
2411
        ret = self * tuple(key[:2])
2412
        return ret
2413

2414
    def __call__(self, name=None):
2415
        """
2416
        Shortcut for :class:`setResultsName`, with ``listAllMatches=False``.
2417

2418
        If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be
2419
        passed as ``True``.
2420

2421
        If ``name` is omitted, same as calling :class:`copy`.
2422

2423
        Example::
2424

2425
            # these are equivalent
2426
            userdata = Word(alphas).setResultsName("name") + Word(nums + "-").setResultsName("socsecno")
2427
            userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
2428
        """
2429
        if name is not None:
2430
            return self._setResultsName(name)
2431
        else:
2432
            return self.copy()
2433

2434
    def suppress(self):
2435
        """
2436
        Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
2437
        cluttering up returned output.
2438
        """
2439
        return Suppress(self)
2440

2441
    def leaveWhitespace(self):
2442
        """
2443
        Disables the skipping of whitespace before matching the characters in the
2444
        :class:`ParserElement`'s defined pattern.  This is normally only used internally by
2445
        the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2446
        """
2447
        self.skipWhitespace = False
2448
        return self
2449

2450
    def setWhitespaceChars(self, chars):
2451
        """
2452
        Overrides the default whitespace chars
2453
        """
2454
        self.skipWhitespace = True
2455
        self.whiteChars = chars
2456
        self.copyDefaultWhiteChars = False
2457
        return self
2458

2459
    def parseWithTabs(self):
2460
        """
2461
        Overrides default behavior to expand ``<TAB>``s to spaces before parsing the input string.
2462
        Must be called before ``parseString`` when the input grammar contains elements that
2463
        match ``<TAB>`` characters.
2464
        """
2465
        self.keepTabs = True
2466
        return self
2467

2468
    def ignore(self, other):
2469
        """
2470
        Define expression to be ignored (e.g., comments) while doing pattern
2471
        matching; may be called repeatedly, to define multiple comment or other
2472
        ignorable patterns.
2473

2474
        Example::
2475

2476
            patt = OneOrMore(Word(alphas))
2477
            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2478

2479
            patt.ignore(cStyleComment)
2480
            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2481
        """
2482
        if isinstance(other, basestring):
2483
            other = Suppress(other)
2484

2485
        if isinstance(other, Suppress):
2486
            if other not in self.ignoreExprs:
2487
                self.ignoreExprs.append(other)
2488
        else:
2489
            self.ignoreExprs.append(Suppress(other.copy()))
2490
        return self
2491

2492
    def setDebugActions(self, startAction, successAction, exceptionAction):
2493
        """
2494
        Enable display of debugging messages while doing pattern matching.
2495
        """
2496
        self.debugActions = (startAction or _defaultStartDebugAction,
2497
                             successAction or _defaultSuccessDebugAction,
2498
                             exceptionAction or _defaultExceptionDebugAction)
2499
        self.debug = True
2500
        return self
2501

2502
    def setDebug(self, flag=True):
2503
        """
2504
        Enable display of debugging messages while doing pattern matching.
2505
        Set ``flag`` to True to enable, False to disable.
2506

2507
        Example::
2508

2509
            wd = Word(alphas).setName("alphaword")
2510
            integer = Word(nums).setName("numword")
2511
            term = wd | integer
2512

2513
            # turn on debugging for wd
2514
            wd.setDebug()
2515

2516
            OneOrMore(term).parseString("abc 123 xyz 890")
2517

2518
        prints::
2519

2520
            Match alphaword at loc 0(1,1)
2521
            Matched alphaword -> ['abc']
2522
            Match alphaword at loc 3(1,4)
2523
            Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2524
            Match alphaword at loc 7(1,8)
2525
            Matched alphaword -> ['xyz']
2526
            Match alphaword at loc 11(1,12)
2527
            Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2528
            Match alphaword at loc 15(1,16)
2529
            Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2530

2531
        The output shown is that produced by the default debug actions - custom debug actions can be
2532
        specified using :class:`setDebugActions`. Prior to attempting
2533
        to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
2534
        is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
2535
        message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression,
2536
        which makes debugging and exception messages easier to understand - for instance, the default
2537
        name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``.
2538
        """
2539
        if flag:
2540
            self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction)
2541
        else:
2542
            self.debug = False
2543
        return self
2544

2545
    def __str__(self):
2546
        return self.name
2547

2548
    def __repr__(self):
2549
        return _ustr(self)
2550

2551
    def streamline(self):
2552
        self.streamlined = True
2553
        self.strRepr = None
2554
        return self
2555

2556
    def checkRecursion(self, parseElementList):
2557
        pass
2558

2559
    def validate(self, validateTrace=None):
2560
        """
2561
        Check defined expressions for valid structure, check for infinite recursive definitions.
2562
        """
2563
        self.checkRecursion([])
2564

2565
    def parseFile(self, file_or_filename, parseAll=False):
2566
        """
2567
        Execute the parse expression on the given file or filename.
2568
        If a filename is specified (instead of a file object),
2569
        the entire file is opened, read, and closed before parsing.
2570
        """
2571
        try:
2572
            file_contents = file_or_filename.read()
2573
        except AttributeError:
2574
            with open(file_or_filename, "r") as f:
2575
                file_contents = f.read()
2576
        try:
2577
            return self.parseString(file_contents, parseAll)
2578
        except ParseBaseException as exc:
2579
            if ParserElement.verbose_stacktrace:
2580
                raise
2581
            else:
2582
                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
2583
                if getattr(exc, '__traceback__', None) is not None:
2584
                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2585
                raise exc
2586

2587
    def __eq__(self, other):
2588
        if self is other:
2589
            return True
2590
        elif isinstance(other, basestring):
2591
            return self.matches(other)
2592
        elif isinstance(other, ParserElement):
2593
            return vars(self) == vars(other)
2594
        return False
2595

2596
    def __ne__(self, other):
2597
        return not (self == other)
2598

2599
    def __hash__(self):
2600
        return id(self)
2601

2602
    def __req__(self, other):
2603
        return self == other
2604

2605
    def __rne__(self, other):
2606
        return not (self == other)
2607

2608
    def matches(self, testString, parseAll=True):
2609
        """
2610
        Method for quick testing of a parser against a test string. Good for simple
2611
        inline microtests of sub expressions while building up larger parser.
2612

2613
        Parameters:
2614
         - testString - to test against this expression for a match
2615
         - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
2616

2617
        Example::
2618

2619
            expr = Word(nums)
2620
            assert expr.matches("100")
2621
        """
2622
        try:
2623
            self.parseString(_ustr(testString), parseAll=parseAll)
2624
            return True
2625
        except ParseBaseException:
2626
            return False
2627

2628
    def runTests(self, tests, parseAll=True, comment='#',
2629
                 fullDump=True, printResults=True, failureTests=False, postParse=None,
2630
                 file=None):
2631
        """
2632
        Execute the parse expression on a series of test strings, showing each
2633
        test, the parsed results or where the parse failed. Quick and easy way to
2634
        run a parse expression against a list of sample strings.
2635

2636
        Parameters:
2637
         - tests - a list of separate test strings, or a multiline string of test strings
2638
         - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
2639
         - comment - (default= ``'#'``) - expression for indicating embedded comments in the test
2640
              string; pass None to disable comment filtering
2641
         - fullDump - (default= ``True``) - dump results as list followed by results names in nested outline;
2642
              if False, only dump nested list
2643
         - printResults - (default= ``True``) prints test output to stdout
2644
         - failureTests - (default= ``False``) indicates if these tests are expected to fail parsing
2645
         - postParse - (default= ``None``) optional callback for successful parse results; called as
2646
              `fn(test_string, parse_results)` and returns a string to be added to the test output
2647
         - file - (default=``None``) optional file-like object to which test output will be written;
2648
              if None, will default to ``sys.stdout``
2649

2650
        Returns: a (success, results) tuple, where success indicates that all tests succeeded
2651
        (or failed if ``failureTests`` is True), and the results contain a list of lines of each
2652
        test's output
2653

2654
        Example::
2655

2656
            number_expr = pyparsing_common.number.copy()
2657

2658
            result = number_expr.runTests('''
2659
                # unsigned integer
2660
                100
2661
                # negative integer
2662
                -100
2663
                # float with scientific notation
2664
                6.02e23
2665
                # integer with scientific notation
2666
                1e-12
2667
                ''')
2668
            print("Success" if result[0] else "Failed!")
2669

2670
            result = number_expr.runTests('''
2671
                # stray character
2672
                100Z
2673
                # missing leading digit before '.'
2674
                -.100
2675
                # too many '.'
2676
                3.14.159
2677
                ''', failureTests=True)
2678
            print("Success" if result[0] else "Failed!")
2679

2680
        prints::
2681

2682
            # unsigned integer
2683
            100
2684
            [100]
2685

2686
            # negative integer
2687
            -100
2688
            [-100]
2689

2690
            # float with scientific notation
2691
            6.02e23
2692
            [6.02e+23]
2693

2694
            # integer with scientific notation
2695
            1e-12
2696
            [1e-12]
2697

2698
            Success
2699

2700
            # stray character
2701
            100Z
2702
               ^
2703
            FAIL: Expected end of text (at char 3), (line:1, col:4)
2704

2705
            # missing leading digit before '.'
2706
            -.100
2707
            ^
2708
            FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2709

2710
            # too many '.'
2711
            3.14.159
2712
                ^
2713
            FAIL: Expected end of text (at char 4), (line:1, col:5)
2714

2715
            Success
2716

2717
        Each test string must be on a single line. If you want to test a string that spans multiple
2718
        lines, create a test like this::
2719

2720
            expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2721

2722
        (Note that this is a raw string literal, you must include the leading 'r'.)
2723
        """
2724
        if isinstance(tests, basestring):
2725
            tests = list(map(str.strip, tests.rstrip().splitlines()))
2726
        if isinstance(comment, basestring):
2727
            comment = Literal(comment)
2728
        if file is None:
2729
            file = sys.stdout
2730
        print_ = file.write
2731

2732
        allResults = []
2733
        comments = []
2734
        success = True
2735
        NL = Literal(r'\n').addParseAction(replaceWith('\n')).ignore(quotedString)
2736
        BOM = u'\ufeff'
2737
        for t in tests:
2738
            if comment is not None and comment.matches(t, False) or comments and not t:
2739
                comments.append(t)
2740
                continue
2741
            if not t:
2742
                continue
2743
            out = ['\n' + '\n'.join(comments) if comments else '', t]
2744
            comments = []
2745
            try:
2746
                # convert newline marks to actual newlines, and strip leading BOM if present
2747
                t = NL.transformString(t.lstrip(BOM))
2748
                result = self.parseString(t, parseAll=parseAll)
2749
            except ParseBaseException as pe:
2750
                fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2751
                if '\n' in t:
2752
                    out.append(line(pe.loc, t))
2753
                    out.append(' ' * (col(pe.loc, t) - 1) + '^' + fatal)
2754
                else:
2755
                    out.append(' ' * pe.loc + '^' + fatal)
2756
                out.append("FAIL: " + str(pe))
2757
                success = success and failureTests
2758
                result = pe
2759
            except Exception as exc:
2760
                out.append("FAIL-EXCEPTION: " + str(exc))
2761
                success = success and failureTests
2762
                result = exc
2763
            else:
2764
                success = success and not failureTests
2765
                if postParse is not None:
2766
                    try:
2767
                        pp_value = postParse(t, result)
2768
                        if pp_value is not None:
2769
                            if isinstance(pp_value, ParseResults):
2770
                                out.append(pp_value.dump())
2771
                            else:
2772
                                out.append(str(pp_value))
2773
                        else:
2774
                            out.append(result.dump())
2775
                    except Exception as e:
2776
                        out.append(result.dump(full=fullDump))
2777
                        out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e))
2778
                else:
2779
                    out.append(result.dump(full=fullDump))
2780

2781
            if printResults:
2782
                if fullDump:
2783
                    out.append('')
2784
                print_('\n'.join(out))
2785

2786
            allResults.append((t, result))
2787

2788
        return success, allResults
2789

2790

2791
class _PendingSkip(ParserElement):
2792
    # internal placeholder class to hold a place were '...' is added to a parser element,
2793
    # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2794
    def __init__(self, expr, must_skip=False):
2795
        super(_PendingSkip, self).__init__()
2796
        self.strRepr = str(expr + Empty()).replace('Empty', '...')
2797
        self.name = self.strRepr
2798
        self.anchor = expr
2799
        self.must_skip = must_skip
2800

2801
    def __add__(self, other):
2802
        skipper = SkipTo(other).setName("...")("_skipped*")
2803
        if self.must_skip:
2804
            def must_skip(t):
2805
                if not t._skipped or t._skipped.asList() == ['']:
2806
                    del t[0]
2807
                    t.pop("_skipped", None)
2808
            def show_skip(t):
2809
                if t._skipped.asList()[-1:] == ['']:
2810
                    skipped = t.pop('_skipped')
2811
                    t['_skipped'] = 'missing <' + repr(self.anchor) + '>'
2812
            return (self.anchor + skipper().addParseAction(must_skip)
2813
                    | skipper().addParseAction(show_skip)) + other
2814

2815
        return self.anchor + skipper + other
2816

2817
    def __repr__(self):
2818
        return self.strRepr
2819

2820
    def parseImpl(self, *args):
2821
        raise Exception("use of `...` expression without following SkipTo target expression")
2822

2823

2824
class Token(ParserElement):
2825
    """Abstract :class:`ParserElement` subclass, for defining atomic
2826
    matching patterns.
2827
    """
2828
    def __init__(self):
2829
        super(Token, self).__init__(savelist=False)
2830

2831

2832
class Empty(Token):
2833
    """An empty token, will always match.
2834
    """
2835
    def __init__(self):
2836
        super(Empty, self).__init__()
2837
        self.name = "Empty"
2838
        self.mayReturnEmpty = True
2839
        self.mayIndexError = False
2840

2841

2842
class NoMatch(Token):
2843
    """A token that will never match.
2844
    """
2845
    def __init__(self):
2846
        super(NoMatch, self).__init__()
2847
        self.name = "NoMatch"
2848
        self.mayReturnEmpty = True
2849
        self.mayIndexError = False
2850
        self.errmsg = "Unmatchable token"
2851

2852
    def parseImpl(self, instring, loc, doActions=True):
2853
        raise ParseException(instring, loc, self.errmsg, self)
2854

2855

2856
class Literal(Token):
2857
    """Token to exactly match a specified string.
2858

2859
    Example::
2860

2861
        Literal('blah').parseString('blah')  # -> ['blah']
2862
        Literal('blah').parseString('blahfooblah')  # -> ['blah']
2863
        Literal('blah').parseString('bla')  # -> Exception: Expected "blah"
2864

2865
    For case-insensitive matching, use :class:`CaselessLiteral`.
2866

2867
    For keyword matching (force word break before and after the matched string),
2868
    use :class:`Keyword` or :class:`CaselessKeyword`.
2869
    """
2870
    def __init__(self, matchString):
2871
        super(Literal, self).__init__()
2872
        self.match = matchString
2873
        self.matchLen = len(matchString)
2874
        try:
2875
            self.firstMatchChar = matchString[0]
2876
        except IndexError:
2877
            warnings.warn("null string passed to Literal; use Empty() instead",
2878
                            SyntaxWarning, stacklevel=2)
2879
            self.__class__ = Empty
2880
        self.name = '"%s"' % _ustr(self.match)
2881
        self.errmsg = "Expected " + self.name
2882
        self.mayReturnEmpty = False
2883
        self.mayIndexError = False
2884

2885
        # Performance tuning: modify __class__ to select
2886
        # a parseImpl optimized for single-character check
2887
        if self.matchLen == 1 and type(self) is Literal:
2888
            self.__class__ = _SingleCharLiteral
2889

2890
    def parseImpl(self, instring, loc, doActions=True):
2891
        if instring[loc] == self.firstMatchChar and instring.startswith(self.match, loc):
2892
            return loc + self.matchLen, self.match
2893
        raise ParseException(instring, loc, self.errmsg, self)
2894

2895
class _SingleCharLiteral(Literal):
2896
    def parseImpl(self, instring, loc, doActions=True):
2897
        if instring[loc] == self.firstMatchChar:
2898
            return loc + 1, self.match
2899
        raise ParseException(instring, loc, self.errmsg, self)
2900

2901
_L = Literal
2902
ParserElement._literalStringClass = Literal
2903

2904
class Keyword(Token):
2905
    """Token to exactly match a specified string as a keyword, that is,
2906
    it must be immediately followed by a non-keyword character.  Compare
2907
    with :class:`Literal`:
2908

2909
     - ``Literal("if")`` will match the leading ``'if'`` in
2910
       ``'ifAndOnlyIf'``.
2911
     - ``Keyword("if")`` will not; it will only match the leading
2912
       ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2913

2914
    Accepts two optional constructor arguments in addition to the
2915
    keyword string:
2916

2917
     - ``identChars`` is a string of characters that would be valid
2918
       identifier characters, defaulting to all alphanumerics + "_" and
2919
       "$"
2920
     - ``caseless`` allows case-insensitive matching, default is ``False``.
2921

2922
    Example::
2923

2924
        Keyword("start").parseString("start")  # -> ['start']
2925
        Keyword("start").parseString("starting")  # -> Exception
2926

2927
    For case-insensitive matching, use :class:`CaselessKeyword`.
2928
    """
2929
    DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2930

2931
    def __init__(self, matchString, identChars=None, caseless=False):
2932
        super(Keyword, self).__init__()
2933
        if identChars is None:
2934
            identChars = Keyword.DEFAULT_KEYWORD_CHARS
2935
        self.match = matchString
2936
        self.matchLen = len(matchString)
2937
        try:
2938
            self.firstMatchChar = matchString[0]
2939
        except IndexError:
2940
            warnings.warn("null string passed to Keyword; use Empty() instead",
2941
                          SyntaxWarning, stacklevel=2)
2942
        self.name = '"%s"' % self.match
2943
        self.errmsg = "Expected " + self.name
2944
        self.mayReturnEmpty = False
2945
        self.mayIndexError = False
2946
        self.caseless = caseless
2947
        if caseless:
2948
            self.caselessmatch = matchString.upper()
2949
            identChars = identChars.upper()
2950
        self.identChars = set(identChars)
2951

2952
    def parseImpl(self, instring, loc, doActions=True):
2953
        if self.caseless:
2954
            if ((instring[loc:loc + self.matchLen].upper() == self.caselessmatch)
2955
                    and (loc >= len(instring) - self.matchLen
2956
                         or instring[loc + self.matchLen].upper() not in self.identChars)
2957
                    and (loc == 0
2958
                         or instring[loc - 1].upper() not in self.identChars)):
2959
                return loc + self.matchLen, self.match
2960

2961
        else:
2962
            if instring[loc] == self.firstMatchChar:
2963
                if ((self.matchLen == 1 or instring.startswith(self.match, loc))
2964
                        and (loc >= len(instring) - self.matchLen
2965
                             or instring[loc + self.matchLen] not in self.identChars)
2966
                        and (loc == 0 or instring[loc - 1] not in self.identChars)):
2967
                    return loc + self.matchLen, self.match
2968

2969
        raise ParseException(instring, loc, self.errmsg, self)
2970

2971
    def copy(self):
2972
        c = super(Keyword, self).copy()
2973
        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
2974
        return c
2975

2976
    @staticmethod
2977
    def setDefaultKeywordChars(chars):
2978
        """Overrides the default Keyword chars
2979
        """
2980
        Keyword.DEFAULT_KEYWORD_CHARS = chars
2981

2982
class CaselessLiteral(Literal):
2983
    """Token to match a specified string, ignoring case of letters.
2984
    Note: the matched results will always be in the case of the given
2985
    match string, NOT the case of the input text.
2986

2987
    Example::
2988

2989
        OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2990

2991
    (Contrast with example for :class:`CaselessKeyword`.)
2992
    """
2993
    def __init__(self, matchString):
2994
        super(CaselessLiteral, self).__init__(matchString.upper())
2995
        # Preserve the defining literal.
2996
        self.returnString = matchString
2997
        self.name = "'%s'" % self.returnString
2998
        self.errmsg = "Expected " + self.name
2999

3000
    def parseImpl(self, instring, loc, doActions=True):
3001
        if instring[loc:loc + self.matchLen].upper() == self.match:
3002
            return loc + self.matchLen, self.returnString
3003
        raise ParseException(instring, loc, self.errmsg, self)
3004

3005
class CaselessKeyword(Keyword):
3006
    """
3007
    Caseless version of :class:`Keyword`.
3008

3009
    Example::
3010

3011
        OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
3012

3013
    (Contrast with example for :class:`CaselessLiteral`.)
3014
    """
3015
    def __init__(self, matchString, identChars=None):
3016
        super(CaselessKeyword, self).__init__(matchString, identChars, caseless=True)
3017

3018
class CloseMatch(Token):
3019
    """A variation on :class:`Literal` which matches "close" matches,
3020
    that is, strings with at most 'n' mismatching characters.
3021
    :class:`CloseMatch` takes parameters:
3022

3023
     - ``match_string`` - string to be matched
3024
     - ``maxMismatches`` - (``default=1``) maximum number of
3025
       mismatches allowed to count as a match
3026

3027
    The results from a successful parse will contain the matched text
3028
    from the input string and the following named results:
3029

3030
     - ``mismatches`` - a list of the positions within the
3031
       match_string where mismatches were found
3032
     - ``original`` - the original match_string used to compare
3033
       against the input string
3034

3035
    If ``mismatches`` is an empty list, then the match was an exact
3036
    match.
3037

3038
    Example::
3039

3040
        patt = CloseMatch("ATCATCGAATGGA")
3041
        patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
3042
        patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
3043

3044
        # exact match
3045
        patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
3046

3047
        # close match allowing up to 2 mismatches
3048
        patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
3049
        patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
3050
    """
3051
    def __init__(self, match_string, maxMismatches=1):
3052
        super(CloseMatch, self).__init__()
3053
        self.name = match_string
3054
        self.match_string = match_string
3055
        self.maxMismatches = maxMismatches
3056
        self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
3057
        self.mayIndexError = False
3058
        self.mayReturnEmpty = False
3059

3060
    def parseImpl(self, instring, loc, doActions=True):
3061
        start = loc
3062
        instrlen = len(instring)
3063
        maxloc = start + len(self.match_string)
3064

3065
        if maxloc <= instrlen:
3066
            match_string = self.match_string
3067
            match_stringloc = 0
3068
            mismatches = []
3069
            maxMismatches = self.maxMismatches
3070

3071
            for match_stringloc, s_m in enumerate(zip(instring[loc:maxloc], match_string)):
3072
                src, mat = s_m
3073
                if src != mat:
3074
                    mismatches.append(match_stringloc)
3075
                    if len(mismatches) > maxMismatches:
3076
                        break
3077
            else:
3078
                loc = match_stringloc + 1
3079
                results = ParseResults([instring[start:loc]])
3080
                results['original'] = match_string
3081
                results['mismatches'] = mismatches
3082
                return loc, results
3083

3084
        raise ParseException(instring, loc, self.errmsg, self)
3085

3086

3087
class Word(Token):
3088
    """Token for matching words composed of allowed character sets.
3089
    Defined with string containing all allowed initial characters, an
3090
    optional string containing allowed body characters (if omitted,
3091
    defaults to the initial character set), and an optional minimum,
3092
    maximum, and/or exact length.  The default value for ``min`` is
3093
    1 (a minimum value < 1 is not valid); the default values for
3094
    ``max`` and ``exact`` are 0, meaning no maximum or exact
3095
    length restriction. An optional ``excludeChars`` parameter can
3096
    list characters that might be found in the input ``bodyChars``
3097
    string; useful to define a word of all printables except for one or
3098
    two characters, for instance.
3099

3100
    :class:`srange` is useful for defining custom character set strings
3101
    for defining ``Word`` expressions, using range notation from
3102
    regular expression character sets.
3103

3104
    A common mistake is to use :class:`Word` to match a specific literal
3105
    string, as in ``Word("Address")``. Remember that :class:`Word`
3106
    uses the string argument to define *sets* of matchable characters.
3107
    This expression would match "Add", "AAA", "dAred", or any other word
3108
    made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
3109
    exact literal string, use :class:`Literal` or :class:`Keyword`.
3110

3111
    pyparsing includes helper strings for building Words:
3112

3113
     - :class:`alphas`
3114
     - :class:`nums`
3115
     - :class:`alphanums`
3116
     - :class:`hexnums`
3117
     - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
3118
       - accented, tilded, umlauted, etc.)
3119
     - :class:`punc8bit` (non-alphabetic characters in ASCII range
3120
       128-255 - currency, symbols, superscripts, diacriticals, etc.)
3121
     - :class:`printables` (any non-whitespace character)
3122

3123
    Example::
3124

3125
        # a word composed of digits
3126
        integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
3127

3128
        # a word with a leading capital, and zero or more lowercase
3129
        capital_word = Word(alphas.upper(), alphas.lower())
3130

3131
        # hostnames are alphanumeric, with leading alpha, and '-'
3132
        hostname = Word(alphas, alphanums + '-')
3133

3134
        # roman numeral (not a strict parser, accepts invalid mix of characters)
3135
        roman = Word("IVXLCDM")
3136

3137
        # any string of non-whitespace characters, except for ','
3138
        csv_value = Word(printables, excludeChars=",")
3139
    """
3140
    def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None):
3141
        super(Word, self).__init__()
3142
        if excludeChars:
3143
            excludeChars = set(excludeChars)
3144
            initChars = ''.join(c for c in initChars if c not in excludeChars)
3145
            if bodyChars:
3146
                bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
3147
        self.initCharsOrig = initChars
3148
        self.initChars = set(initChars)
3149
        if bodyChars:
3150
            self.bodyCharsOrig = bodyChars
3151
            self.bodyChars = set(bodyChars)
3152
        else:
3153
            self.bodyCharsOrig = initChars
3154
            self.bodyChars = set(initChars)
3155

3156
        self.maxSpecified = max > 0
3157

3158
        if min < 1:
3159
            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
3160

3161
        self.minLen = min
3162

3163
        if max > 0:
3164
            self.maxLen = max
3165
        else:
3166
            self.maxLen = _MAX_INT
3167

3168
        if exact > 0:
3169
            self.maxLen = exact
3170
            self.minLen = exact
3171

3172
        self.name = _ustr(self)
3173
        self.errmsg = "Expected " + self.name
3174
        self.mayIndexError = False
3175
        self.asKeyword = asKeyword
3176

3177
        if ' ' not in self.initCharsOrig + self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0):
3178
            if self.bodyCharsOrig == self.initCharsOrig:
3179
                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
3180
            elif len(self.initCharsOrig) == 1:
3181
                self.reString = "%s[%s]*" % (re.escape(self.initCharsOrig),
3182
                                             _escapeRegexRangeChars(self.bodyCharsOrig),)
3183
            else:
3184
                self.reString = "[%s][%s]*" % (_escapeRegexRangeChars(self.initCharsOrig),
3185
                                               _escapeRegexRangeChars(self.bodyCharsOrig),)
3186
            if self.asKeyword:
3187
                self.reString = r"\b" + self.reString + r"\b"
3188

3189
            try:
3190
                self.re = re.compile(self.reString)
3191
            except Exception:
3192
                self.re = None
3193
            else:
3194
                self.re_match = self.re.match
3195
                self.__class__ = _WordRegex
3196

3197
    def parseImpl(self, instring, loc, doActions=True):
3198
        if instring[loc] not in self.initChars:
3199
            raise ParseException(instring, loc, self.errmsg, self)
3200

3201
        start = loc
3202
        loc += 1
3203
        instrlen = len(instring)
3204
        bodychars = self.bodyChars
3205
        maxloc = start + self.maxLen
3206
        maxloc = min(maxloc, instrlen)
3207
        while loc < maxloc and instring[loc] in bodychars:
3208
            loc += 1
3209

3210
        throwException = False
3211
        if loc - start < self.minLen:
3212
            throwException = True
3213
        elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
3214
            throwException = True
3215
        elif self.asKeyword:
3216
            if (start > 0 and instring[start - 1] in bodychars
3217
                    or loc < instrlen and instring[loc] in bodychars):
3218
                throwException = True
3219

3220
        if throwException:
3221
            raise ParseException(instring, loc, self.errmsg, self)
3222

3223
        return loc, instring[start:loc]
3224

3225
    def __str__(self):
3226
        try:
3227
            return super(Word, self).__str__()
3228
        except Exception:
3229
            pass
3230

3231
        if self.strRepr is None:
3232

3233
            def charsAsStr(s):
3234
                if len(s) > 4:
3235
                    return s[:4] + "..."
3236
                else:
3237
                    return s
3238

3239
            if self.initCharsOrig != self.bodyCharsOrig:
3240
                self.strRepr = "W:(%s, %s)" % (charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig))
3241
            else:
3242
                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
3243

3244
        return self.strRepr
3245

3246
class _WordRegex(Word):
3247
    def parseImpl(self, instring, loc, doActions=True):
3248
        result = self.re_match(instring, loc)
3249
        if not result:
3250
            raise ParseException(instring, loc, self.errmsg, self)
3251

3252
        loc = result.end()
3253
        return loc, result.group()
3254

3255

3256
class Char(_WordRegex):
3257
    """A short-cut class for defining ``Word(characters, exact=1)``,
3258
    when defining a match of any single character in a string of
3259
    characters.
3260
    """
3261
    def __init__(self, charset, asKeyword=False, excludeChars=None):
3262
        super(Char, self).__init__(charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars)
3263
        self.reString = "[%s]" % _escapeRegexRangeChars(''.join(self.initChars))
3264
        if asKeyword:
3265
            self.reString = r"\b%s\b" % self.reString
3266
        self.re = re.compile(self.reString)
3267
        self.re_match = self.re.match
3268

3269

3270
class Regex(Token):
3271
    r"""Token for matching strings that match a given regular
3272
    expression. Defined with string specifying the regular expression in
3273
    a form recognized by the stdlib Python  `re module <https://docs.python.org/3/library/re.html>`_.
3274
    If the given regex contains named groups (defined using ``(?P<name>...)``),
3275
    these will be preserved as named parse results.
3276

3277
    If instead of the Python stdlib re module you wish to use a different RE module
3278
    (such as the `regex` module), you can replace it by either building your
3279
    Regex object with a compiled RE that was compiled using regex:
3280

3281
    Example::
3282

3283
        realnum = Regex(r"[+-]?\d+\.\d*")
3284
        date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3285
        # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3286
        roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3287

3288
        # use regex module instead of stdlib re module to construct a Regex using
3289
        # a compiled regular expression
3290
        import regex
3291
        parser = pp.Regex(regex.compile(r'[0-9]'))
3292

3293
    """
3294
    def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False):
3295
        """The parameters ``pattern`` and ``flags`` are passed
3296
        to the ``re.compile()`` function as-is. See the Python
3297
        `re module <https://docs.python.org/3/library/re.html>`_ module for an
3298
        explanation of the acceptable patterns and flags.
3299
        """
3300
        super(Regex, self).__init__()
3301

3302
        if isinstance(pattern, basestring):
3303
            if not pattern:
3304
                warnings.warn("null string passed to Regex; use Empty() instead",
3305
                              SyntaxWarning, stacklevel=2)
3306

3307
            self.pattern = pattern
3308
            self.flags = flags
3309

3310
            try:
3311
                self.re = re.compile(self.pattern, self.flags)
3312
                self.reString = self.pattern
3313
            except sre_constants.error:
3314
                warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
3315
                              SyntaxWarning, stacklevel=2)
3316
                raise
3317

3318
        elif hasattr(pattern, 'pattern') and hasattr(pattern, 'match'):
3319
            self.re = pattern
3320
            self.pattern = self.reString = pattern.pattern
3321
            self.flags = flags
3322

3323
        else:
3324
            raise TypeError("Regex may only be constructed with a string or a compiled RE object")
3325

3326
        self.re_match = self.re.match
3327

3328
        self.name = _ustr(self)
3329
        self.errmsg = "Expected " + self.name
3330
        self.mayIndexError = False
3331
        self.mayReturnEmpty = self.re_match("") is not None
3332
        self.asGroupList = asGroupList
3333
        self.asMatch = asMatch
3334
        if self.asGroupList:
3335
            self.parseImpl = self.parseImplAsGroupList
3336
        if self.asMatch:
3337
            self.parseImpl = self.parseImplAsMatch
3338

3339
    def parseImpl(self, instring, loc, doActions=True):
3340
        result = self.re_match(instring, loc)
3341
        if not result:
3342
            raise ParseException(instring, loc, self.errmsg, self)
3343

3344
        loc = result.end()
3345
        ret = ParseResults(result.group())
3346
        d = result.groupdict()
3347
        if d:
3348
            for k, v in d.items():
3349
                ret[k] = v
3350
        return loc, ret
3351

3352
    def parseImplAsGroupList(self, instring, loc, doActions=True):
3353
        result = self.re_match(instring, loc)
3354
        if not result:
3355
            raise ParseException(instring, loc, self.errmsg, self)
3356

3357
        loc = result.end()
3358
        ret = result.groups()
3359
        return loc, ret
3360

3361
    def parseImplAsMatch(self, instring, loc, doActions=True):
3362
        result = self.re_match(instring, loc)
3363
        if not result:
3364
            raise ParseException(instring, loc, self.errmsg, self)
3365

3366
        loc = result.end()
3367
        ret = result
3368
        return loc, ret
3369

3370
    def __str__(self):
3371
        try:
3372
            return super(Regex, self).__str__()
3373
        except Exception:
3374
            pass
3375

3376
        if self.strRepr is None:
3377
            self.strRepr = "Re:(%s)" % repr(self.pattern)
3378

3379
        return self.strRepr
3380

3381
    def sub(self, repl):
3382
        r"""
3383
        Return Regex with an attached parse action to transform the parsed
3384
        result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3385

3386
        Example::
3387

3388
            make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3389
            print(make_html.transformString("h1:main title:"))
3390
            # prints "<h1>main title</h1>"
3391
        """
3392
        if self.asGroupList:
3393
            warnings.warn("cannot use sub() with Regex(asGroupList=True)",
3394
                          SyntaxWarning, stacklevel=2)
3395
            raise SyntaxError()
3396

3397
        if self.asMatch and callable(repl):
3398
            warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)",
3399
                          SyntaxWarning, stacklevel=2)
3400
            raise SyntaxError()
3401

3402
        if self.asMatch:
3403
            def pa(tokens):
3404
                return tokens[0].expand(repl)
3405
        else:
3406
            def pa(tokens):
3407
                return self.re.sub(repl, tokens[0])
3408
        return self.addParseAction(pa)
3409

3410
class QuotedString(Token):
3411
    r"""
3412
    Token for matching strings that are delimited by quoting characters.
3413

3414
    Defined with the following parameters:
3415

3416
        - quoteChar - string of one or more characters defining the
3417
          quote delimiting string
3418
        - escChar - character to escape quotes, typically backslash
3419
          (default= ``None``)
3420
        - escQuote - special quote sequence to escape an embedded quote
3421
          string (such as SQL's ``""`` to escape an embedded ``"``)
3422
          (default= ``None``)
3423
        - multiline - boolean indicating whether quotes can span
3424
          multiple lines (default= ``False``)
3425
        - unquoteResults - boolean indicating whether the matched text
3426
          should be unquoted (default= ``True``)
3427
        - endQuoteChar - string of one or more characters defining the
3428
          end of the quote delimited string (default= ``None``  => same as
3429
          quoteChar)
3430
        - convertWhitespaceEscapes - convert escaped whitespace
3431
          (``'\t'``, ``'\n'``, etc.) to actual whitespace
3432
          (default= ``True``)
3433

3434
    Example::
3435

3436
        qs = QuotedString('"')
3437
        print(qs.searchString('lsjdf "This is the quote" sldjf'))
3438
        complex_qs = QuotedString('{{', endQuoteChar='}}')
3439
        print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
3440
        sql_qs = QuotedString('"', escQuote='""')
3441
        print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3442

3443
    prints::
3444

3445
        [['This is the quote']]
3446
        [['This is the "quote"']]
3447
        [['This is the quote with "embedded" quotes']]
3448
    """
3449
    def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False,
3450
                 unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
3451
        super(QuotedString, self).__init__()
3452

3453
        # remove white space from quote chars - wont work anyway
3454
        quoteChar = quoteChar.strip()
3455
        if not quoteChar:
3456
            warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2)
3457
            raise SyntaxError()
3458

3459
        if endQuoteChar is None:
3460
            endQuoteChar = quoteChar
3461
        else:
3462
            endQuoteChar = endQuoteChar.strip()
3463
            if not endQuoteChar:
3464
                warnings.warn("endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2)
3465
                raise SyntaxError()
3466

3467
        self.quoteChar = quoteChar
3468
        self.quoteCharLen = len(quoteChar)
3469
        self.firstQuoteChar = quoteChar[0]
3470
        self.endQuoteChar = endQuoteChar
3471
        self.endQuoteCharLen = len(endQuoteChar)
3472
        self.escChar = escChar
3473
        self.escQuote = escQuote
3474
        self.unquoteResults = unquoteResults
3475
        self.convertWhitespaceEscapes = convertWhitespaceEscapes
3476

3477
        if multiline:
3478
            self.flags = re.MULTILINE | re.DOTALL
3479
            self.pattern = r'%s(?:[^%s%s]' % (re.escape(self.quoteChar),
3480
                                              _escapeRegexRangeChars(self.endQuoteChar[0]),
3481
                                              (escChar is not None and _escapeRegexRangeChars(escChar) or ''))
3482
        else:
3483
            self.flags = 0
3484
            self.pattern = r'%s(?:[^%s\n\r%s]' % (re.escape(self.quoteChar),
3485
                                                  _escapeRegexRangeChars(self.endQuoteChar[0]),
3486
                                                  (escChar is not None and _escapeRegexRangeChars(escChar) or ''))
3487
        if len(self.endQuoteChar) > 1:
3488
            self.pattern += (
3489
                '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
3490
                                                   _escapeRegexRangeChars(self.endQuoteChar[i]))
3491
                                      for i in range(len(self.endQuoteChar) - 1, 0, -1)) + ')')
3492

3493
        if escQuote:
3494
            self.pattern += (r'|(?:%s)' % re.escape(escQuote))
3495
        if escChar:
3496
            self.pattern += (r'|(?:%s.)' % re.escape(escChar))
3497
            self.escCharReplacePattern = re.escape(self.escChar) + "(.)"
3498
        self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
3499

3500
        try:
3501
            self.re = re.compile(self.pattern, self.flags)
3502
            self.reString = self.pattern
3503
            self.re_match = self.re.match
3504
        except sre_constants.error:
3505
            warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
3506
                          SyntaxWarning, stacklevel=2)
3507
            raise
3508

3509
        self.name = _ustr(self)
3510
        self.errmsg = "Expected " + self.name
3511
        self.mayIndexError = False
3512
        self.mayReturnEmpty = True
3513

3514
    def parseImpl(self, instring, loc, doActions=True):
3515
        result = instring[loc] == self.firstQuoteChar and self.re_match(instring, loc) or None
3516
        if not result:
3517
            raise ParseException(instring, loc, self.errmsg, self)
3518

3519
        loc = result.end()
3520
        ret = result.group()
3521

3522
        if self.unquoteResults:
3523

3524
            # strip off quotes
3525
            ret = ret[self.quoteCharLen: -self.endQuoteCharLen]
3526

3527
            if isinstance(ret, basestring):
3528
                # replace escaped whitespace
3529
                if '\\' in ret and self.convertWhitespaceEscapes:
3530
                    ws_map = {
3531
                        r'\t': '\t',
3532
                        r'\n': '\n',
3533
                        r'\f': '\f',
3534
                        r'\r': '\r',
3535
                    }
3536
                    for wslit, wschar in ws_map.items():
3537
                        ret = ret.replace(wslit, wschar)
3538

3539
                # replace escaped characters
3540
                if self.escChar:
3541
                    ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
3542

3543
                # replace escaped quotes
3544
                if self.escQuote:
3545
                    ret = ret.replace(self.escQuote, self.endQuoteChar)
3546

3547
        return loc, ret
3548

3549
    def __str__(self):
3550
        try:
3551
            return super(QuotedString, self).__str__()
3552
        except Exception:
3553
            pass
3554

3555
        if self.strRepr is None:
3556
            self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
3557

3558
        return self.strRepr
3559

3560

3561
class CharsNotIn(Token):
3562
    """Token for matching words composed of characters *not* in a given
3563
    set (will include whitespace in matched characters if not listed in
3564
    the provided exclusion set - see example). Defined with string
3565
    containing all disallowed characters, and an optional minimum,
3566
    maximum, and/or exact length.  The default value for ``min`` is
3567
    1 (a minimum value < 1 is not valid); the default values for
3568
    ``max`` and ``exact`` are 0, meaning no maximum or exact
3569
    length restriction.
3570

3571
    Example::
3572

3573
        # define a comma-separated-value as anything that is not a ','
3574
        csv_value = CharsNotIn(',')
3575
        print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
3576

3577
    prints::
3578

3579
        ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3580
    """
3581
    def __init__(self, notChars, min=1, max=0, exact=0):
3582
        super(CharsNotIn, self).__init__()
3583
        self.skipWhitespace = False
3584
        self.notChars = notChars
3585

3586
        if min < 1:
3587
            raise ValueError("cannot specify a minimum length < 1; use "
3588
                             "Optional(CharsNotIn()) if zero-length char group is permitted")
3589

3590
        self.minLen = min
3591

3592
        if max > 0:
3593
            self.maxLen = max
3594
        else:
3595
            self.maxLen = _MAX_INT
3596

3597
        if exact > 0:
3598
            self.maxLen = exact
3599
            self.minLen = exact
3600

3601
        self.name = _ustr(self)
3602
        self.errmsg = "Expected " + self.name
3603
        self.mayReturnEmpty = (self.minLen == 0)
3604
        self.mayIndexError = False
3605

3606
    def parseImpl(self, instring, loc, doActions=True):
3607
        if instring[loc] in self.notChars:
3608
            raise ParseException(instring, loc, self.errmsg, self)
3609

3610
        start = loc
3611
        loc += 1
3612
        notchars = self.notChars
3613
        maxlen = min(start + self.maxLen, len(instring))
3614
        while loc < maxlen and instring[loc] not in notchars:
3615
            loc += 1
3616

3617
        if loc - start < self.minLen:
3618
            raise ParseException(instring, loc, self.errmsg, self)
3619

3620
        return loc, instring[start:loc]
3621

3622
    def __str__(self):
3623
        try:
3624
            return super(CharsNotIn, self).__str__()
3625
        except Exception:
3626
            pass
3627

3628
        if self.strRepr is None:
3629
            if len(self.notChars) > 4:
3630
                self.strRepr = "!W:(%s...)" % self.notChars[:4]
3631
            else:
3632
                self.strRepr = "!W:(%s)" % self.notChars
3633

3634
        return self.strRepr
3635

3636
class White(Token):
3637
    """Special matching class for matching whitespace.  Normally,
3638
    whitespace is ignored by pyparsing grammars.  This class is included
3639
    when some whitespace structures are significant.  Define with
3640
    a string containing the whitespace characters to be matched; default
3641
    is ``" \\t\\r\\n"``.  Also takes optional ``min``,
3642
    ``max``, and ``exact`` arguments, as defined for the
3643
    :class:`Word` class.
3644
    """
3645
    whiteStrs = {
3646
        ' ' : '<SP>',
3647
        '\t': '<TAB>',
3648
        '\n': '<LF>',
3649
        '\r': '<CR>',
3650
        '\f': '<FF>',
3651
        u'\u00A0': '<NBSP>',
3652
        u'\u1680': '<OGHAM_SPACE_MARK>',
3653
        u'\u180E': '<MONGOLIAN_VOWEL_SEPARATOR>',
3654
        u'\u2000': '<EN_QUAD>',
3655
        u'\u2001': '<EM_QUAD>',
3656
        u'\u2002': '<EN_SPACE>',
3657
        u'\u2003': '<EM_SPACE>',
3658
        u'\u2004': '<THREE-PER-EM_SPACE>',
3659
        u'\u2005': '<FOUR-PER-EM_SPACE>',
3660
        u'\u2006': '<SIX-PER-EM_SPACE>',
3661
        u'\u2007': '<FIGURE_SPACE>',
3662
        u'\u2008': '<PUNCTUATION_SPACE>',
3663
        u'\u2009': '<THIN_SPACE>',
3664
        u'\u200A': '<HAIR_SPACE>',
3665
        u'\u200B': '<ZERO_WIDTH_SPACE>',
3666
        u'\u202F': '<NNBSP>',
3667
        u'\u205F': '<MMSP>',
3668
        u'\u3000': '<IDEOGRAPHIC_SPACE>',
3669
        }
3670
    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3671
        super(White, self).__init__()
3672
        self.matchWhite = ws
3673
        self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite))
3674
        # ~ self.leaveWhitespace()
3675
        self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3676
        self.mayReturnEmpty = True
3677
        self.errmsg = "Expected " + self.name
3678

3679
        self.minLen = min
3680

3681
        if max > 0:
3682
            self.maxLen = max
3683
        else:
3684
            self.maxLen = _MAX_INT
3685

3686
        if exact > 0:
3687
            self.maxLen = exact
3688
            self.minLen = exact
3689

3690
    def parseImpl(self, instring, loc, doActions=True):
3691
        if instring[loc] not in self.matchWhite:
3692
            raise ParseException(instring, loc, self.errmsg, self)
3693
        start = loc
3694
        loc += 1
3695
        maxloc = start + self.maxLen
3696
        maxloc = min(maxloc, len(instring))
3697
        while loc < maxloc and instring[loc] in self.matchWhite:
3698
            loc += 1
3699

3700
        if loc - start < self.minLen:
3701
            raise ParseException(instring, loc, self.errmsg, self)
3702

3703
        return loc, instring[start:loc]
3704

3705

3706
class _PositionToken(Token):
3707
    def __init__(self):
3708
        super(_PositionToken, self).__init__()
3709
        self.name = self.__class__.__name__
3710
        self.mayReturnEmpty = True
3711
        self.mayIndexError = False
3712

3713
class GoToColumn(_PositionToken):
3714
    """Token to advance to a specific column of input text; useful for
3715
    tabular report scraping.
3716
    """
3717
    def __init__(self, colno):
3718
        super(GoToColumn, self).__init__()
3719
        self.col = colno
3720

3721
    def preParse(self, instring, loc):
3722
        if col(loc, instring) != self.col:
3723
            instrlen = len(instring)
3724
            if self.ignoreExprs:
3725
                loc = self._skipIgnorables(instring, loc)
3726
            while loc < instrlen and instring[loc].isspace() and col(loc, instring) != self.col:
3727
                loc += 1
3728
        return loc
3729

3730
    def parseImpl(self, instring, loc, doActions=True):
3731
        thiscol = col(loc, instring)
3732
        if thiscol > self.col:
3733
            raise ParseException(instring, loc, "Text not in expected column", self)
3734
        newloc = loc + self.col - thiscol
3735
        ret = instring[loc: newloc]
3736
        return newloc, ret
3737

3738

3739
class LineStart(_PositionToken):
3740
    r"""Matches if current position is at the beginning of a line within
3741
    the parse string
3742

3743
    Example::
3744

3745
        test = '''\
3746
        AAA this line
3747
        AAA and this line
3748
          AAA but not this one
3749
        B AAA and definitely not this one
3750
        '''
3751

3752
        for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3753
            print(t)
3754

3755
    prints::
3756

3757
        ['AAA', ' this line']
3758
        ['AAA', ' and this line']
3759

3760
    """
3761
    def __init__(self):
3762
        super(LineStart, self).__init__()
3763
        self.errmsg = "Expected start of line"
3764

3765
    def parseImpl(self, instring, loc, doActions=True):
3766
        if col(loc, instring) == 1:
3767
            return loc, []
3768
        raise ParseException(instring, loc, self.errmsg, self)
3769

3770
class LineEnd(_PositionToken):
3771
    """Matches if current position is at the end of a line within the
3772
    parse string
3773
    """
3774
    def __init__(self):
3775
        super(LineEnd, self).__init__()
3776
        self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", ""))
3777
        self.errmsg = "Expected end of line"
3778

3779
    def parseImpl(self, instring, loc, doActions=True):
3780
        if loc < len(instring):
3781
            if instring[loc] == "\n":
3782
                return loc + 1, "\n"
3783
            else:
3784
                raise ParseException(instring, loc, self.errmsg, self)
3785
        elif loc == len(instring):
3786
            return loc + 1, []
3787
        else:
3788
            raise ParseException(instring, loc, self.errmsg, self)
3789

3790
class StringStart(_PositionToken):
3791
    """Matches if current position is at the beginning of the parse
3792
    string
3793
    """
3794
    def __init__(self):
3795
        super(StringStart, self).__init__()
3796
        self.errmsg = "Expected start of text"
3797

3798
    def parseImpl(self, instring, loc, doActions=True):
3799
        if loc != 0:
3800
            # see if entire string up to here is just whitespace and ignoreables
3801
            if loc != self.preParse(instring, 0):
3802
                raise ParseException(instring, loc, self.errmsg, self)
3803
        return loc, []
3804

3805
class StringEnd(_PositionToken):
3806
    """Matches if current position is at the end of the parse string
3807
    """
3808
    def __init__(self):
3809
        super(StringEnd, self).__init__()
3810
        self.errmsg = "Expected end of text"
3811

3812
    def parseImpl(self, instring, loc, doActions=True):
3813
        if loc < len(instring):
3814
            raise ParseException(instring, loc, self.errmsg, self)
3815
        elif loc == len(instring):
3816
            return loc + 1, []
3817
        elif loc > len(instring):
3818
            return loc, []
3819
        else:
3820
            raise ParseException(instring, loc, self.errmsg, self)
3821

3822
class WordStart(_PositionToken):
3823
    """Matches if the current position is at the beginning of a Word,
3824
    and is not preceded by any character in a given set of
3825
    ``wordChars`` (default= ``printables``). To emulate the
3826
    ``\b`` behavior of regular expressions, use
3827
    ``WordStart(alphanums)``. ``WordStart`` will also match at
3828
    the beginning of the string being parsed, or at the beginning of
3829
    a line.
3830
    """
3831
    def __init__(self, wordChars=printables):
3832
        super(WordStart, self).__init__()
3833
        self.wordChars = set(wordChars)
3834
        self.errmsg = "Not at the start of a word"
3835

3836
    def parseImpl(self, instring, loc, doActions=True):
3837
        if loc != 0:
3838
            if (instring[loc - 1] in self.wordChars
3839
                    or instring[loc] not in self.wordChars):
3840
                raise ParseException(instring, loc, self.errmsg, self)
3841
        return loc, []
3842

3843
class WordEnd(_PositionToken):
3844
    """Matches if the current position is at the end of a Word, and is
3845
    not followed by any character in a given set of ``wordChars``
3846
    (default= ``printables``). To emulate the ``\b`` behavior of
3847
    regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3848
    will also match at the end of the string being parsed, or at the end
3849
    of a line.
3850
    """
3851
    def __init__(self, wordChars=printables):
3852
        super(WordEnd, self).__init__()
3853
        self.wordChars = set(wordChars)
3854
        self.skipWhitespace = False
3855
        self.errmsg = "Not at the end of a word"
3856

3857
    def parseImpl(self, instring, loc, doActions=True):
3858
        instrlen = len(instring)
3859
        if instrlen > 0 and loc < instrlen:
3860
            if (instring[loc] in self.wordChars or
3861
                    instring[loc - 1] not in self.wordChars):
3862
                raise ParseException(instring, loc, self.errmsg, self)
3863
        return loc, []
3864

3865

3866
class ParseExpression(ParserElement):
3867
    """Abstract subclass of ParserElement, for combining and
3868
    post-processing parsed tokens.
3869
    """
3870
    def __init__(self, exprs, savelist=False):
3871
        super(ParseExpression, self).__init__(savelist)
3872
        if isinstance(exprs, _generatorType):
3873
            exprs = list(exprs)
3874

3875
        if isinstance(exprs, basestring):
3876
            self.exprs = [self._literalStringClass(exprs)]
3877
        elif isinstance(exprs, ParserElement):
3878
            self.exprs = [exprs]
3879
        elif isinstance(exprs, Iterable):
3880
            exprs = list(exprs)
3881
            # if sequence of strings provided, wrap with Literal
3882
            if any(isinstance(expr, basestring) for expr in exprs):
3883
                exprs = (self._literalStringClass(e) if isinstance(e, basestring) else e for e in exprs)
3884
            self.exprs = list(exprs)
3885
        else:
3886
            try:
3887
                self.exprs = list(exprs)
3888
            except TypeError:
3889
                self.exprs = [exprs]
3890
        self.callPreparse = False
3891

3892
    def append(self, other):
3893
        self.exprs.append(other)
3894
        self.strRepr = None
3895
        return self
3896

3897
    def leaveWhitespace(self):
3898
        """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on
3899
           all contained expressions."""
3900
        self.skipWhitespace = False
3901
        self.exprs = [e.copy() for e in self.exprs]
3902
        for e in self.exprs:
3903
            e.leaveWhitespace()
3904
        return self
3905

3906
    def ignore(self, other):
3907
        if isinstance(other, Suppress):
3908
            if other not in self.ignoreExprs:
3909
                super(ParseExpression, self).ignore(other)
3910
                for e in self.exprs:
3911
                    e.ignore(self.ignoreExprs[-1])
3912
        else:
3913
            super(ParseExpression, self).ignore(other)
3914
            for e in self.exprs:
3915
                e.ignore(self.ignoreExprs[-1])
3916
        return self
3917

3918
    def __str__(self):
3919
        try:
3920
            return super(ParseExpression, self).__str__()
3921
        except Exception:
3922
            pass
3923

3924
        if self.strRepr is None:
3925
            self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.exprs))
3926
        return self.strRepr
3927

3928
    def streamline(self):
3929
        super(ParseExpression, self).streamline()
3930

3931
        for e in self.exprs:
3932
            e.streamline()
3933

3934
        # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d)
3935
        # but only if there are no parse actions or resultsNames on the nested And's
3936
        # (likewise for Or's and MatchFirst's)
3937
        if len(self.exprs) == 2:
3938
            other = self.exprs[0]
3939
            if (isinstance(other, self.__class__)
3940
                    and not other.parseAction
3941
                    and other.resultsName is None
3942
                    and not other.debug):
3943
                self.exprs = other.exprs[:] + [self.exprs[1]]
3944
                self.strRepr = None
3945
                self.mayReturnEmpty |= other.mayReturnEmpty
3946
                self.mayIndexError  |= other.mayIndexError
3947

3948
            other = self.exprs[-1]
3949
            if (isinstance(other, self.__class__)
3950
                    and not other.parseAction
3951
                    and other.resultsName is None
3952
                    and not other.debug):
3953
                self.exprs = self.exprs[:-1] + other.exprs[:]
3954
                self.strRepr = None
3955
                self.mayReturnEmpty |= other.mayReturnEmpty
3956
                self.mayIndexError  |= other.mayIndexError
3957

3958
        self.errmsg = "Expected " + _ustr(self)
3959

3960
        return self
3961

3962
    def validate(self, validateTrace=None):
3963
        tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
3964
        for e in self.exprs:
3965
            e.validate(tmp)
3966
        self.checkRecursion([])
3967

3968
    def copy(self):
3969
        ret = super(ParseExpression, self).copy()
3970
        ret.exprs = [e.copy() for e in self.exprs]
3971
        return ret
3972

3973
    def _setResultsName(self, name, listAllMatches=False):
3974
        if __diag__.warn_ungrouped_named_tokens_in_collection:
3975
            for e in self.exprs:
3976
                if isinstance(e, ParserElement) and e.resultsName:
3977
                    warnings.warn("{0}: setting results name {1!r} on {2} expression "
3978
                                  "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
3979
                                                                                       name,
3980
                                                                                       type(self).__name__,
3981
                                                                                       e.resultsName),
3982
                                  stacklevel=3)
3983

3984
        return super(ParseExpression, self)._setResultsName(name, listAllMatches)
3985

3986

3987
class And(ParseExpression):
3988
    """
3989
    Requires all given :class:`ParseExpression` s to be found in the given order.
3990
    Expressions may be separated by whitespace.
3991
    May be constructed using the ``'+'`` operator.
3992
    May also be constructed using the ``'-'`` operator, which will
3993
    suppress backtracking.
3994

3995
    Example::
3996

3997
        integer = Word(nums)
3998
        name_expr = OneOrMore(Word(alphas))
3999

4000
        expr = And([integer("id"), name_expr("name"), integer("age")])
4001
        # more easily written as:
4002
        expr = integer("id") + name_expr("name") + integer("age")
4003
    """
4004

4005
    class _ErrorStop(Empty):
4006
        def __init__(self, *args, **kwargs):
4007
            super(And._ErrorStop, self).__init__(*args, **kwargs)
4008
            self.name = '-'
4009
            self.leaveWhitespace()
4010

4011
    def __init__(self, exprs, savelist=True):
4012
        exprs = list(exprs)
4013
        if exprs and Ellipsis in exprs:
4014
            tmp = []
4015
            for i, expr in enumerate(exprs):
4016
                if expr is Ellipsis:
4017
                    if i < len(exprs) - 1:
4018
                        skipto_arg = (Empty() + exprs[i + 1]).exprs[-1]
4019
                        tmp.append(SkipTo(skipto_arg)("_skipped*"))
4020
                    else:
4021
                        raise Exception("cannot construct And with sequence ending in ...")
4022
                else:
4023
                    tmp.append(expr)
4024
            exprs[:] = tmp
4025
        super(And, self).__init__(exprs, savelist)
4026
        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4027
        self.setWhitespaceChars(self.exprs[0].whiteChars)
4028
        self.skipWhitespace = self.exprs[0].skipWhitespace
4029
        self.callPreparse = True
4030

4031
    def streamline(self):
4032
        # collapse any _PendingSkip's
4033
        if self.exprs:
4034
            if any(isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip)
4035
                   for e in self.exprs[:-1]):
4036
                for i, e in enumerate(self.exprs[:-1]):
4037
                    if e is None:
4038
                        continue
4039
                    if (isinstance(e, ParseExpression)
4040
                            and e.exprs and isinstance(e.exprs[-1], _PendingSkip)):
4041
                        e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4042
                        self.exprs[i + 1] = None
4043
                self.exprs = [e for e in self.exprs if e is not None]
4044

4045
        super(And, self).streamline()
4046
        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4047
        return self
4048

4049
    def parseImpl(self, instring, loc, doActions=True):
4050
        # pass False as last arg to _parse for first element, since we already
4051
        # pre-parsed the string as part of our And pre-parsing
4052
        loc, resultlist = self.exprs[0]._parse(instring, loc, doActions, callPreParse=False)
4053
        errorStop = False
4054
        for e in self.exprs[1:]:
4055
            if isinstance(e, And._ErrorStop):
4056
                errorStop = True
4057
                continue
4058
            if errorStop:
4059
                try:
4060
                    loc, exprtokens = e._parse(instring, loc, doActions)
4061
                except ParseSyntaxException:
4062
                    raise
4063
                except ParseBaseException as pe:
4064
                    pe.__traceback__ = None
4065
                    raise ParseSyntaxException._from_exception(pe)
4066
                except IndexError:
4067
                    raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
4068
            else:
4069
                loc, exprtokens = e._parse(instring, loc, doActions)
4070
            if exprtokens or exprtokens.haskeys():
4071
                resultlist += exprtokens
4072
        return loc, resultlist
4073

4074
    def __iadd__(self, other):
4075
        if isinstance(other, basestring):
4076
            other = self._literalStringClass(other)
4077
        return self.append(other)  # And([self, other])
4078

4079
    def checkRecursion(self, parseElementList):
4080
        subRecCheckList = parseElementList[:] + [self]
4081
        for e in self.exprs:
4082
            e.checkRecursion(subRecCheckList)
4083
            if not e.mayReturnEmpty:
4084
                break
4085

4086
    def __str__(self):
4087
        if hasattr(self, "name"):
4088
            return self.name
4089

4090
        if self.strRepr is None:
4091
            self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
4092

4093
        return self.strRepr
4094

4095

4096
class Or(ParseExpression):
4097
    """Requires that at least one :class:`ParseExpression` is found. If
4098
    two expressions match, the expression that matches the longest
4099
    string will be used. May be constructed using the ``'^'``
4100
    operator.
4101

4102
    Example::
4103

4104
        # construct Or using '^' operator
4105

4106
        number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4107
        print(number.searchString("123 3.1416 789"))
4108

4109
    prints::
4110

4111
        [['123'], ['3.1416'], ['789']]
4112
    """
4113
    def __init__(self, exprs, savelist=False):
4114
        super(Or, self).__init__(exprs, savelist)
4115
        if self.exprs:
4116
            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4117
        else:
4118
            self.mayReturnEmpty = True
4119

4120
    def streamline(self):
4121
        super(Or, self).streamline()
4122
        if __compat__.collect_all_And_tokens:
4123
            self.saveAsList = any(e.saveAsList for e in self.exprs)
4124
        return self
4125

4126
    def parseImpl(self, instring, loc, doActions=True):
4127
        maxExcLoc = -1
4128
        maxException = None
4129
        matches = []
4130
        for e in self.exprs:
4131
            try:
4132
                loc2 = e.tryParse(instring, loc)
4133
            except ParseException as err:
4134
                err.__traceback__ = None
4135
                if err.loc > maxExcLoc:
4136
                    maxException = err
4137
                    maxExcLoc = err.loc
4138
            except IndexError:
4139
                if len(instring) > maxExcLoc:
4140
                    maxException = ParseException(instring, len(instring), e.errmsg, self)
4141
                    maxExcLoc = len(instring)
4142
            else:
4143
                # save match among all matches, to retry longest to shortest
4144
                matches.append((loc2, e))
4145

4146
        if matches:
4147
            # re-evaluate all matches in descending order of length of match, in case attached actions
4148
            # might change whether or how much they match of the input.
4149
            matches.sort(key=itemgetter(0), reverse=True)
4150

4151
            if not doActions:
4152
                # no further conditions or parse actions to change the selection of
4153
                # alternative, so the first match will be the best match
4154
                best_expr = matches[0][1]
4155
                return best_expr._parse(instring, loc, doActions)
4156

4157
            longest = -1, None
4158
            for loc1, expr1 in matches:
4159
                if loc1 <= longest[0]:
4160
                    # already have a longer match than this one will deliver, we are done
4161
                    return longest
4162

4163
                try:
4164
                    loc2, toks = expr1._parse(instring, loc, doActions)
4165
                except ParseException as err:
4166
                    err.__traceback__ = None
4167
                    if err.loc > maxExcLoc:
4168
                        maxException = err
4169
                        maxExcLoc = err.loc
4170
                else:
4171
                    if loc2 >= loc1:
4172
                        return loc2, toks
4173
                    # didn't match as much as before
4174
                    elif loc2 > longest[0]:
4175
                        longest = loc2, toks
4176

4177
            if longest != (-1, None):
4178
                return longest
4179

4180
        if maxException is not None:
4181
            maxException.msg = self.errmsg
4182
            raise maxException
4183
        else:
4184
            raise ParseException(instring, loc, "no defined alternatives to match", self)
4185

4186

4187
    def __ixor__(self, other):
4188
        if isinstance(other, basestring):
4189
            other = self._literalStringClass(other)
4190
        return self.append(other)  # Or([self, other])
4191

4192
    def __str__(self):
4193
        if hasattr(self, "name"):
4194
            return self.name
4195

4196
        if self.strRepr is None:
4197
            self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
4198

4199
        return self.strRepr
4200

4201
    def checkRecursion(self, parseElementList):
4202
        subRecCheckList = parseElementList[:] + [self]
4203
        for e in self.exprs:
4204
            e.checkRecursion(subRecCheckList)
4205

4206
    def _setResultsName(self, name, listAllMatches=False):
4207
        if (not __compat__.collect_all_And_tokens
4208
                and __diag__.warn_multiple_tokens_in_named_alternation):
4209
            if any(isinstance(e, And) for e in self.exprs):
4210
                warnings.warn("{0}: setting results name {1!r} on {2} expression "
4211
                              "may only return a single token for an And alternative, "
4212
                              "in future will return the full list of tokens".format(
4213
                    "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
4214
                    stacklevel=3)
4215

4216
        return super(Or, self)._setResultsName(name, listAllMatches)
4217

4218

4219
class MatchFirst(ParseExpression):
4220
    """Requires that at least one :class:`ParseExpression` is found. If
4221
    two expressions match, the first one listed is the one that will
4222
    match. May be constructed using the ``'|'`` operator.
4223

4224
    Example::
4225

4226
        # construct MatchFirst using '|' operator
4227

4228
        # watch the order of expressions to match
4229
        number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4230
        print(number.searchString("123 3.1416 789")) #  Fail! -> [['123'], ['3'], ['1416'], ['789']]
4231

4232
        # put more selective expression first
4233
        number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4234
        print(number.searchString("123 3.1416 789")) #  Better -> [['123'], ['3.1416'], ['789']]
4235
    """
4236
    def __init__(self, exprs, savelist=False):
4237
        super(MatchFirst, self).__init__(exprs, savelist)
4238
        if self.exprs:
4239
            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4240
        else:
4241
            self.mayReturnEmpty = True
4242

4243
    def streamline(self):
4244
        super(MatchFirst, self).streamline()
4245
        if __compat__.collect_all_And_tokens:
4246
            self.saveAsList = any(e.saveAsList for e in self.exprs)
4247
        return self
4248

4249
    def parseImpl(self, instring, loc, doActions=True):
4250
        maxExcLoc = -1
4251
        maxException = None
4252
        for e in self.exprs:
4253
            try:
4254
                ret = e._parse(instring, loc, doActions)
4255
                return ret
4256
            except ParseException as err:
4257
                if err.loc > maxExcLoc:
4258
                    maxException = err
4259
                    maxExcLoc = err.loc
4260
            except IndexError:
4261
                if len(instring) > maxExcLoc:
4262
                    maxException = ParseException(instring, len(instring), e.errmsg, self)
4263
                    maxExcLoc = len(instring)
4264

4265
        # only got here if no expression matched, raise exception for match that made it the furthest
4266
        else:
4267
            if maxException is not None:
4268
                maxException.msg = self.errmsg
4269
                raise maxException
4270
            else:
4271
                raise ParseException(instring, loc, "no defined alternatives to match", self)
4272

4273
    def __ior__(self, other):
4274
        if isinstance(other, basestring):
4275
            other = self._literalStringClass(other)
4276
        return self.append(other)  # MatchFirst([self, other])
4277

4278
    def __str__(self):
4279
        if hasattr(self, "name"):
4280
            return self.name
4281

4282
        if self.strRepr is None:
4283
            self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
4284

4285
        return self.strRepr
4286

4287
    def checkRecursion(self, parseElementList):
4288
        subRecCheckList = parseElementList[:] + [self]
4289
        for e in self.exprs:
4290
            e.checkRecursion(subRecCheckList)
4291

4292
    def _setResultsName(self, name, listAllMatches=False):
4293
        if (not __compat__.collect_all_And_tokens
4294
                and __diag__.warn_multiple_tokens_in_named_alternation):
4295
            if any(isinstance(e, And) for e in self.exprs):
4296
                warnings.warn("{0}: setting results name {1!r} on {2} expression "
4297
                              "may only return a single token for an And alternative, "
4298
                              "in future will return the full list of tokens".format(
4299
                    "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
4300
                    stacklevel=3)
4301

4302
        return super(MatchFirst, self)._setResultsName(name, listAllMatches)
4303

4304

4305
class Each(ParseExpression):
4306
    """Requires all given :class:`ParseExpression` s to be found, but in
4307
    any order. Expressions may be separated by whitespace.
4308

4309
    May be constructed using the ``'&'`` operator.
4310

4311
    Example::
4312

4313
        color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4314
        shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4315
        integer = Word(nums)
4316
        shape_attr = "shape:" + shape_type("shape")
4317
        posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4318
        color_attr = "color:" + color("color")
4319
        size_attr = "size:" + integer("size")
4320

4321
        # use Each (using operator '&') to accept attributes in any order
4322
        # (shape and posn are required, color and size are optional)
4323
        shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
4324

4325
        shape_spec.runTests('''
4326
            shape: SQUARE color: BLACK posn: 100, 120
4327
            shape: CIRCLE size: 50 color: BLUE posn: 50,80
4328
            color:GREEN size:20 shape:TRIANGLE posn:20,40
4329
            '''
4330
            )
4331

4332
    prints::
4333

4334
        shape: SQUARE color: BLACK posn: 100, 120
4335
        ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4336
        - color: BLACK
4337
        - posn: ['100', ',', '120']
4338
          - x: 100
4339
          - y: 120
4340
        - shape: SQUARE
4341

4342

4343
        shape: CIRCLE size: 50 color: BLUE posn: 50,80
4344
        ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4345
        - color: BLUE
4346
        - posn: ['50', ',', '80']
4347
          - x: 50
4348
          - y: 80
4349
        - shape: CIRCLE
4350
        - size: 50
4351

4352

4353
        color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4354
        ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4355
        - color: GREEN
4356
        - posn: ['20', ',', '40']
4357
          - x: 20
4358
          - y: 40
4359
        - shape: TRIANGLE
4360
        - size: 20
4361
    """
4362
    def __init__(self, exprs, savelist=True):
4363
        super(Each, self).__init__(exprs, savelist)
4364
        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4365
        self.skipWhitespace = True
4366
        self.initExprGroups = True
4367
        self.saveAsList = True
4368

4369
    def streamline(self):
4370
        super(Each, self).streamline()
4371
        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4372
        return self
4373

4374
    def parseImpl(self, instring, loc, doActions=True):
4375
        if self.initExprGroups:
4376
            self.opt1map = dict((id(e.expr), e) for e in self.exprs if isinstance(e, Optional))
4377
            opt1 = [e.expr for e in self.exprs if isinstance(e, Optional)]
4378
            opt2 = [e for e in self.exprs if e.mayReturnEmpty and not isinstance(e, (Optional, Regex))]
4379
            self.optionals = opt1 + opt2
4380
            self.multioptionals = [e.expr for e in self.exprs if isinstance(e, ZeroOrMore)]
4381
            self.multirequired = [e.expr for e in self.exprs if isinstance(e, OneOrMore)]
4382
            self.required = [e for e in self.exprs if not isinstance(e, (Optional, ZeroOrMore, OneOrMore))]
4383
            self.required += self.multirequired
4384
            self.initExprGroups = False
4385
        tmpLoc = loc
4386
        tmpReqd = self.required[:]
4387
        tmpOpt  = self.optionals[:]
4388
        matchOrder = []
4389

4390
        keepMatching = True
4391
        while keepMatching:
4392
            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
4393
            failed = []
4394
            for e in tmpExprs:
4395
                try:
4396
                    tmpLoc = e.tryParse(instring, tmpLoc)
4397
                except ParseException:
4398
                    failed.append(e)
4399
                else:
4400
                    matchOrder.append(self.opt1map.get(id(e), e))
4401
                    if e in tmpReqd:
4402
                        tmpReqd.remove(e)
4403
                    elif e in tmpOpt:
4404
                        tmpOpt.remove(e)
4405
            if len(failed) == len(tmpExprs):
4406
                keepMatching = False
4407

4408
        if tmpReqd:
4409
            missing = ", ".join(_ustr(e) for e in tmpReqd)
4410
            raise ParseException(instring, loc, "Missing one or more required elements (%s)" % missing)
4411

4412
        # add any unmatched Optionals, in case they have default values defined
4413
        matchOrder += [e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt]
4414

4415
        resultlist = []
4416
        for e in matchOrder:
4417
            loc, results = e._parse(instring, loc, doActions)
4418
            resultlist.append(results)
4419

4420
        finalResults = sum(resultlist, ParseResults([]))
4421
        return loc, finalResults
4422

4423
    def __str__(self):
4424
        if hasattr(self, "name"):
4425
            return self.name
4426

4427
        if self.strRepr is None:
4428
            self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
4429

4430
        return self.strRepr
4431

4432
    def checkRecursion(self, parseElementList):
4433
        subRecCheckList = parseElementList[:] + [self]
4434
        for e in self.exprs:
4435
            e.checkRecursion(subRecCheckList)
4436

4437

4438
class ParseElementEnhance(ParserElement):
4439
    """Abstract subclass of :class:`ParserElement`, for combining and
4440
    post-processing parsed tokens.
4441
    """
4442
    def __init__(self, expr, savelist=False):
4443
        super(ParseElementEnhance, self).__init__(savelist)
4444
        if isinstance(expr, basestring):
4445
            if issubclass(self._literalStringClass, Token):
4446
                expr = self._literalStringClass(expr)
4447
            else:
4448
                expr = self._literalStringClass(Literal(expr))
4449
        self.expr = expr
4450
        self.strRepr = None
4451
        if expr is not None:
4452
            self.mayIndexError = expr.mayIndexError
4453
            self.mayReturnEmpty = expr.mayReturnEmpty
4454
            self.setWhitespaceChars(expr.whiteChars)
4455
            self.skipWhitespace = expr.skipWhitespace
4456
            self.saveAsList = expr.saveAsList
4457
            self.callPreparse = expr.callPreparse
4458
            self.ignoreExprs.extend(expr.ignoreExprs)
4459

4460
    def parseImpl(self, instring, loc, doActions=True):
4461
        if self.expr is not None:
4462
            return self.expr._parse(instring, loc, doActions, callPreParse=False)
4463
        else:
4464
            raise ParseException("", loc, self.errmsg, self)
4465

4466
    def leaveWhitespace(self):
4467
        self.skipWhitespace = False
4468
        self.expr = self.expr.copy()
4469
        if self.expr is not None:
4470
            self.expr.leaveWhitespace()
4471
        return self
4472

4473
    def ignore(self, other):
4474
        if isinstance(other, Suppress):
4475
            if other not in self.ignoreExprs:
4476
                super(ParseElementEnhance, self).ignore(other)
4477
                if self.expr is not None:
4478
                    self.expr.ignore(self.ignoreExprs[-1])
4479
        else:
4480
            super(ParseElementEnhance, self).ignore(other)
4481
            if self.expr is not None:
4482
                self.expr.ignore(self.ignoreExprs[-1])
4483
        return self
4484

4485
    def streamline(self):
4486
        super(ParseElementEnhance, self).streamline()
4487
        if self.expr is not None:
4488
            self.expr.streamline()
4489
        return self
4490

4491
    def checkRecursion(self, parseElementList):
4492
        if self in parseElementList:
4493
            raise RecursiveGrammarException(parseElementList + [self])
4494
        subRecCheckList = parseElementList[:] + [self]
4495
        if self.expr is not None:
4496
            self.expr.checkRecursion(subRecCheckList)
4497

4498
    def validate(self, validateTrace=None):
4499
        if validateTrace is None:
4500
            validateTrace = []
4501
        tmp = validateTrace[:] + [self]
4502
        if self.expr is not None:
4503
            self.expr.validate(tmp)
4504
        self.checkRecursion([])
4505

4506
    def __str__(self):
4507
        try:
4508
            return super(ParseElementEnhance, self).__str__()
4509
        except Exception:
4510
            pass
4511

4512
        if self.strRepr is None and self.expr is not None:
4513
            self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.expr))
4514
        return self.strRepr
4515

4516

4517
class FollowedBy(ParseElementEnhance):
4518
    """Lookahead matching of the given parse expression.
4519
    ``FollowedBy`` does *not* advance the parsing position within
4520
    the input string, it only verifies that the specified parse
4521
    expression matches at the current position.  ``FollowedBy``
4522
    always returns a null token list. If any results names are defined
4523
    in the lookahead expression, those *will* be returned for access by
4524
    name.
4525

4526
    Example::
4527

4528
        # use FollowedBy to match a label only if it is followed by a ':'
4529
        data_word = Word(alphas)
4530
        label = data_word + FollowedBy(':')
4531
        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4532

4533
        OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
4534

4535
    prints::
4536

4537
        [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4538
    """
4539
    def __init__(self, expr):
4540
        super(FollowedBy, self).__init__(expr)
4541
        self.mayReturnEmpty = True
4542

4543
    def parseImpl(self, instring, loc, doActions=True):
4544
        # by using self._expr.parse and deleting the contents of the returned ParseResults list
4545
        # we keep any named results that were defined in the FollowedBy expression
4546
        _, ret = self.expr._parse(instring, loc, doActions=doActions)
4547
        del ret[:]
4548

4549
        return loc, ret
4550

4551

4552
class PrecededBy(ParseElementEnhance):
4553
    """Lookbehind matching of the given parse expression.
4554
    ``PrecededBy`` does not advance the parsing position within the
4555
    input string, it only verifies that the specified parse expression
4556
    matches prior to the current position.  ``PrecededBy`` always
4557
    returns a null token list, but if a results name is defined on the
4558
    given expression, it is returned.
4559

4560
    Parameters:
4561

4562
     - expr - expression that must match prior to the current parse
4563
       location
4564
     - retreat - (default= ``None``) - (int) maximum number of characters
4565
       to lookbehind prior to the current parse location
4566

4567
    If the lookbehind expression is a string, Literal, Keyword, or
4568
    a Word or CharsNotIn with a specified exact or maximum length, then
4569
    the retreat parameter is not required. Otherwise, retreat must be
4570
    specified to give a maximum number of characters to look back from
4571
    the current parse position for a lookbehind match.
4572

4573
    Example::
4574

4575
        # VB-style variable names with type prefixes
4576
        int_var = PrecededBy("#") + pyparsing_common.identifier
4577
        str_var = PrecededBy("$") + pyparsing_common.identifier
4578

4579
    """
4580
    def __init__(self, expr, retreat=None):
4581
        super(PrecededBy, self).__init__(expr)
4582
        self.expr = self.expr().leaveWhitespace()
4583
        self.mayReturnEmpty = True
4584
        self.mayIndexError = False
4585
        self.exact = False
4586
        if isinstance(expr, str):
4587
            retreat = len(expr)
4588
            self.exact = True
4589
        elif isinstance(expr, (Literal, Keyword)):
4590
            retreat = expr.matchLen
4591
            self.exact = True
4592
        elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
4593
            retreat = expr.maxLen
4594
            self.exact = True
4595
        elif isinstance(expr, _PositionToken):
4596
            retreat = 0
4597
            self.exact = True
4598
        self.retreat = retreat
4599
        self.errmsg = "not preceded by " + str(expr)
4600
        self.skipWhitespace = False
4601
        self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
4602

4603
    def parseImpl(self, instring, loc=0, doActions=True):
4604
        if self.exact:
4605
            if loc < self.retreat:
4606
                raise ParseException(instring, loc, self.errmsg)
4607
            start = loc - self.retreat
4608
            _, ret = self.expr._parse(instring, start)
4609
        else:
4610
            # retreat specified a maximum lookbehind window, iterate
4611
            test_expr = self.expr + StringEnd()
4612
            instring_slice = instring[max(0, loc - self.retreat):loc]
4613
            last_expr = ParseException(instring, loc, self.errmsg)
4614
            for offset in range(1, min(loc, self.retreat + 1)+1):
4615
                try:
4616
                    # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
4617
                    _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
4618
                except ParseBaseException as pbe:
4619
                    last_expr = pbe
4620
                else:
4621
                    break
4622
            else:
4623
                raise last_expr
4624
        return loc, ret
4625

4626

4627
class NotAny(ParseElementEnhance):
4628
    """Lookahead to disallow matching with the given parse expression.
4629
    ``NotAny`` does *not* advance the parsing position within the
4630
    input string, it only verifies that the specified parse expression
4631
    does *not* match at the current position.  Also, ``NotAny`` does
4632
    *not* skip over leading whitespace. ``NotAny`` always returns
4633
    a null token list.  May be constructed using the '~' operator.
4634

4635
    Example::
4636

4637
        AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
4638

4639
        # take care not to mistake keywords for identifiers
4640
        ident = ~(AND | OR | NOT) + Word(alphas)
4641
        boolean_term = Optional(NOT) + ident
4642

4643
        # very crude boolean expression - to support parenthesis groups and
4644
        # operation hierarchy, use infixNotation
4645
        boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term)
4646

4647
        # integers that are followed by "." are actually floats
4648
        integer = Word(nums) + ~Char(".")
4649
    """
4650
    def __init__(self, expr):
4651
        super(NotAny, self).__init__(expr)
4652
        # ~ self.leaveWhitespace()
4653
        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
4654
        self.mayReturnEmpty = True
4655
        self.errmsg = "Found unwanted token, " + _ustr(self.expr)
4656

4657
    def parseImpl(self, instring, loc, doActions=True):
4658
        if self.expr.canParseNext(instring, loc):
4659
            raise ParseException(instring, loc, self.errmsg, self)
4660
        return loc, []
4661

4662
    def __str__(self):
4663
        if hasattr(self, "name"):
4664
            return self.name
4665

4666
        if self.strRepr is None:
4667
            self.strRepr = "~{" + _ustr(self.expr) + "}"
4668

4669
        return self.strRepr
4670

4671
class _MultipleMatch(ParseElementEnhance):
4672
    def __init__(self, expr, stopOn=None):
4673
        super(_MultipleMatch, self).__init__(expr)
4674
        self.saveAsList = True
4675
        ender = stopOn
4676
        if isinstance(ender, basestring):
4677
            ender = self._literalStringClass(ender)
4678
        self.stopOn(ender)
4679

4680
    def stopOn(self, ender):
4681
        if isinstance(ender, basestring):
4682
            ender = self._literalStringClass(ender)
4683
        self.not_ender = ~ender if ender is not None else None
4684
        return self
4685

4686
    def parseImpl(self, instring, loc, doActions=True):
4687
        self_expr_parse = self.expr._parse
4688
        self_skip_ignorables = self._skipIgnorables
4689
        check_ender = self.not_ender is not None
4690
        if check_ender:
4691
            try_not_ender = self.not_ender.tryParse
4692

4693
        # must be at least one (but first see if we are the stopOn sentinel;
4694
        # if so, fail)
4695
        if check_ender:
4696
            try_not_ender(instring, loc)
4697
        loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False)
4698
        try:
4699
            hasIgnoreExprs = (not not self.ignoreExprs)
4700
            while 1:
4701
                if check_ender:
4702
                    try_not_ender(instring, loc)
4703
                if hasIgnoreExprs:
4704
                    preloc = self_skip_ignorables(instring, loc)
4705
                else:
4706
                    preloc = loc
4707
                loc, tmptokens = self_expr_parse(instring, preloc, doActions)
4708
                if tmptokens or tmptokens.haskeys():
4709
                    tokens += tmptokens
4710
        except (ParseException, IndexError):
4711
            pass
4712

4713
        return loc, tokens
4714

4715
    def _setResultsName(self, name, listAllMatches=False):
4716
        if __diag__.warn_ungrouped_named_tokens_in_collection:
4717
            for e in [self.expr] + getattr(self.expr, 'exprs', []):
4718
                if isinstance(e, ParserElement) and e.resultsName:
4719
                    warnings.warn("{0}: setting results name {1!r} on {2} expression "
4720
                                  "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
4721
                                                                                       name,
4722
                                                                                       type(self).__name__,
4723
                                                                                       e.resultsName),
4724
                                  stacklevel=3)
4725

4726
        return super(_MultipleMatch, self)._setResultsName(name, listAllMatches)
4727

4728

4729
class OneOrMore(_MultipleMatch):
4730
    """Repetition of one or more of the given expression.
4731

4732
    Parameters:
4733
     - expr - expression that must match one or more times
4734
     - stopOn - (default= ``None``) - expression for a terminating sentinel
4735
          (only required if the sentinel would ordinarily match the repetition
4736
          expression)
4737

4738
    Example::
4739

4740
        data_word = Word(alphas)
4741
        label = data_word + FollowedBy(':')
4742
        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4743

4744
        text = "shape: SQUARE posn: upper left color: BLACK"
4745
        OneOrMore(attr_expr).parseString(text).pprint()  # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
4746

4747
        # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
4748
        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4749
        OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
4750

4751
        # could also be written as
4752
        (attr_expr * (1,)).parseString(text).pprint()
4753
    """
4754

4755
    def __str__(self):
4756
        if hasattr(self, "name"):
4757
            return self.name
4758

4759
        if self.strRepr is None:
4760
            self.strRepr = "{" + _ustr(self.expr) + "}..."
4761

4762
        return self.strRepr
4763

4764
class ZeroOrMore(_MultipleMatch):
4765
    """Optional repetition of zero or more of the given expression.
4766

4767
    Parameters:
4768
     - expr - expression that must match zero or more times
4769
     - stopOn - (default= ``None``) - expression for a terminating sentinel
4770
          (only required if the sentinel would ordinarily match the repetition
4771
          expression)
4772

4773
    Example: similar to :class:`OneOrMore`
4774
    """
4775
    def __init__(self, expr, stopOn=None):
4776
        super(ZeroOrMore, self).__init__(expr, stopOn=stopOn)
4777
        self.mayReturnEmpty = True
4778

4779
    def parseImpl(self, instring, loc, doActions=True):
4780
        try:
4781
            return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
4782
        except (ParseException, IndexError):
4783
            return loc, []
4784

4785
    def __str__(self):
4786
        if hasattr(self, "name"):
4787
            return self.name
4788

4789
        if self.strRepr is None:
4790
            self.strRepr = "[" + _ustr(self.expr) + "]..."
4791

4792
        return self.strRepr
4793

4794

4795
class _NullToken(object):
4796
    def __bool__(self):
4797
        return False
4798
    __nonzero__ = __bool__
4799
    def __str__(self):
4800
        return ""
4801

4802
class Optional(ParseElementEnhance):
4803
    """Optional matching of the given expression.
4804

4805
    Parameters:
4806
     - expr - expression that must match zero or more times
4807
     - default (optional) - value to be returned if the optional expression is not found.
4808

4809
    Example::
4810

4811
        # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
4812
        zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
4813
        zip.runTests('''
4814
            # traditional ZIP code
4815
            12345
4816

4817
            # ZIP+4 form
4818
            12101-0001
4819

4820
            # invalid ZIP
4821
            98765-
4822
            ''')
4823

4824
    prints::
4825

4826
        # traditional ZIP code
4827
        12345
4828
        ['12345']
4829

4830
        # ZIP+4 form
4831
        12101-0001
4832
        ['12101-0001']
4833

4834
        # invalid ZIP
4835
        98765-
4836
             ^
4837
        FAIL: Expected end of text (at char 5), (line:1, col:6)
4838
    """
4839
    __optionalNotMatched = _NullToken()
4840

4841
    def __init__(self, expr, default=__optionalNotMatched):
4842
        super(Optional, self).__init__(expr, savelist=False)
4843
        self.saveAsList = self.expr.saveAsList
4844
        self.defaultValue = default
4845
        self.mayReturnEmpty = True
4846

4847
    def parseImpl(self, instring, loc, doActions=True):
4848
        try:
4849
            loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False)
4850
        except (ParseException, IndexError):
4851
            if self.defaultValue is not self.__optionalNotMatched:
4852
                if self.expr.resultsName:
4853
                    tokens = ParseResults([self.defaultValue])
4854
                    tokens[self.expr.resultsName] = self.defaultValue
4855
                else:
4856
                    tokens = [self.defaultValue]
4857
            else:
4858
                tokens = []
4859
        return loc, tokens
4860

4861
    def __str__(self):
4862
        if hasattr(self, "name"):
4863
            return self.name
4864

4865
        if self.strRepr is None:
4866
            self.strRepr = "[" + _ustr(self.expr) + "]"
4867

4868
        return self.strRepr
4869

4870
class SkipTo(ParseElementEnhance):
4871
    """Token for skipping over all undefined text until the matched
4872
    expression is found.
4873

4874
    Parameters:
4875
     - expr - target expression marking the end of the data to be skipped
4876
     - include - (default= ``False``) if True, the target expression is also parsed
4877
          (the skipped text and target expression are returned as a 2-element list).
4878
     - ignore - (default= ``None``) used to define grammars (typically quoted strings and
4879
          comments) that might contain false matches to the target expression
4880
     - failOn - (default= ``None``) define expressions that are not allowed to be
4881
          included in the skipped test; if found before the target expression is found,
4882
          the SkipTo is not a match
4883

4884
    Example::
4885

4886
        report = '''
4887
            Outstanding Issues Report - 1 Jan 2000
4888

4889
               # | Severity | Description                               |  Days Open
4890
            -----+----------+-------------------------------------------+-----------
4891
             101 | Critical | Intermittent system crash                 |          6
4892
              94 | Cosmetic | Spelling error on Login ('log|n')         |         14
4893
              79 | Minor    | System slow when running too many reports |         47
4894
            '''
4895
        integer = Word(nums)
4896
        SEP = Suppress('|')
4897
        # use SkipTo to simply match everything up until the next SEP
4898
        # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4899
        # - parse action will call token.strip() for each matched token, i.e., the description body
4900
        string_data = SkipTo(SEP, ignore=quotedString)
4901
        string_data.setParseAction(tokenMap(str.strip))
4902
        ticket_expr = (integer("issue_num") + SEP
4903
                      + string_data("sev") + SEP
4904
                      + string_data("desc") + SEP
4905
                      + integer("days_open"))
4906

4907
        for tkt in ticket_expr.searchString(report):
4908
            print tkt.dump()
4909

4910
    prints::
4911

4912
        ['101', 'Critical', 'Intermittent system crash', '6']
4913
        - days_open: 6
4914
        - desc: Intermittent system crash
4915
        - issue_num: 101
4916
        - sev: Critical
4917
        ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4918
        - days_open: 14
4919
        - desc: Spelling error on Login ('log|n')
4920
        - issue_num: 94
4921
        - sev: Cosmetic
4922
        ['79', 'Minor', 'System slow when running too many reports', '47']
4923
        - days_open: 47
4924
        - desc: System slow when running too many reports
4925
        - issue_num: 79
4926
        - sev: Minor
4927
    """
4928
    def __init__(self, other, include=False, ignore=None, failOn=None):
4929
        super(SkipTo, self).__init__(other)
4930
        self.ignoreExpr = ignore
4931
        self.mayReturnEmpty = True
4932
        self.mayIndexError = False
4933
        self.includeMatch = include
4934
        self.saveAsList = False
4935
        if isinstance(failOn, basestring):
4936
            self.failOn = self._literalStringClass(failOn)
4937
        else:
4938
            self.failOn = failOn
4939
        self.errmsg = "No match found for " + _ustr(self.expr)
4940

4941
    def parseImpl(self, instring, loc, doActions=True):
4942
        startloc = loc
4943
        instrlen = len(instring)
4944
        expr = self.expr
4945
        expr_parse = self.expr._parse
4946
        self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4947
        self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4948

4949
        tmploc = loc
4950
        while tmploc <= instrlen:
4951
            if self_failOn_canParseNext is not None:
4952
                # break if failOn expression matches
4953
                if self_failOn_canParseNext(instring, tmploc):
4954
                    break
4955

4956
            if self_ignoreExpr_tryParse is not None:
4957
                # advance past ignore expressions
4958
                while 1:
4959
                    try:
4960
                        tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4961
                    except ParseBaseException:
4962
                        break
4963

4964
            try:
4965
                expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4966
            except (ParseException, IndexError):
4967
                # no match, advance loc in string
4968
                tmploc += 1
4969
            else:
4970
                # matched skipto expr, done
4971
                break
4972

4973
        else:
4974
            # ran off the end of the input string without matching skipto expr, fail
4975
            raise ParseException(instring, loc, self.errmsg, self)
4976

4977
        # build up return values
4978
        loc = tmploc
4979
        skiptext = instring[startloc:loc]
4980
        skipresult = ParseResults(skiptext)
4981

4982
        if self.includeMatch:
4983
            loc, mat = expr_parse(instring, loc, doActions, callPreParse=False)
4984
            skipresult += mat
4985

4986
        return loc, skipresult
4987

4988
class Forward(ParseElementEnhance):
4989
    """Forward declaration of an expression to be defined later -
4990
    used for recursive grammars, such as algebraic infix notation.
4991
    When the expression is known, it is assigned to the ``Forward``
4992
    variable using the '<<' operator.
4993

4994
    Note: take care when assigning to ``Forward`` not to overlook
4995
    precedence of operators.
4996

4997
    Specifically, '|' has a lower precedence than '<<', so that::
4998

4999
        fwdExpr << a | b | c
5000

5001
    will actually be evaluated as::
5002

5003
        (fwdExpr << a) | b | c
5004

5005
    thereby leaving b and c out as parseable alternatives.  It is recommended that you
5006
    explicitly group the values inserted into the ``Forward``::
5007

5008
        fwdExpr << (a | b | c)
5009

5010
    Converting to use the '<<=' operator instead will avoid this problem.
5011

5012
    See :class:`ParseResults.pprint` for an example of a recursive
5013
    parser created using ``Forward``.
5014
    """
5015
    def __init__(self, other=None):
5016
        super(Forward, self).__init__(other, savelist=False)
5017

5018
    def __lshift__(self, other):
5019
        if isinstance(other, basestring):
5020
            other = self._literalStringClass(other)
5021
        self.expr = other
5022
        self.strRepr = None
5023
        self.mayIndexError = self.expr.mayIndexError
5024
        self.mayReturnEmpty = self.expr.mayReturnEmpty
5025
        self.setWhitespaceChars(self.expr.whiteChars)
5026
        self.skipWhitespace = self.expr.skipWhitespace
5027
        self.saveAsList = self.expr.saveAsList
5028
        self.ignoreExprs.extend(self.expr.ignoreExprs)
5029
        return self
5030

5031
    def __ilshift__(self, other):
5032
        return self << other
5033

5034
    def leaveWhitespace(self):
5035
        self.skipWhitespace = False
5036
        return self
5037

5038
    def streamline(self):
5039
        if not self.streamlined:
5040
            self.streamlined = True
5041
            if self.expr is not None:
5042
                self.expr.streamline()
5043
        return self
5044

5045
    def validate(self, validateTrace=None):
5046
        if validateTrace is None:
5047
            validateTrace = []
5048

5049
        if self not in validateTrace:
5050
            tmp = validateTrace[:] + [self]
5051
            if self.expr is not None:
5052
                self.expr.validate(tmp)
5053
        self.checkRecursion([])
5054

5055
    def __str__(self):
5056
        if hasattr(self, "name"):
5057
            return self.name
5058
        if self.strRepr is not None:
5059
            return self.strRepr
5060

5061
        # Avoid infinite recursion by setting a temporary strRepr
5062
        self.strRepr = ": ..."
5063

5064
        # Use the string representation of main expression.
5065
        retString = '...'
5066
        try:
5067
            if self.expr is not None:
5068
                retString = _ustr(self.expr)[:1000]
5069
            else:
5070
                retString = "None"
5071
        finally:
5072
            self.strRepr = self.__class__.__name__ + ": " + retString
5073
        return self.strRepr
5074

5075
    def copy(self):
5076
        if self.expr is not None:
5077
            return super(Forward, self).copy()
5078
        else:
5079
            ret = Forward()
5080
            ret <<= self
5081
            return ret
5082

5083
    def _setResultsName(self, name, listAllMatches=False):
5084
        if __diag__.warn_name_set_on_empty_Forward:
5085
            if self.expr is None:
5086
                warnings.warn("{0}: setting results name {0!r} on {1} expression "
5087
                              "that has no contained expression".format("warn_name_set_on_empty_Forward",
5088
                                                                        name,
5089
                                                                        type(self).__name__),
5090
                              stacklevel=3)
5091

5092
        return super(Forward, self)._setResultsName(name, listAllMatches)
5093

5094
class TokenConverter(ParseElementEnhance):
5095
    """
5096
    Abstract subclass of :class:`ParseExpression`, for converting parsed results.
5097
    """
5098
    def __init__(self, expr, savelist=False):
5099
        super(TokenConverter, self).__init__(expr)  # , savelist)
5100
        self.saveAsList = False
5101

5102
class Combine(TokenConverter):
5103
    """Converter to concatenate all matching tokens to a single string.
5104
    By default, the matching patterns must also be contiguous in the
5105
    input string; this can be disabled by specifying
5106
    ``'adjacent=False'`` in the constructor.
5107

5108
    Example::
5109

5110
        real = Word(nums) + '.' + Word(nums)
5111
        print(real.parseString('3.1416')) # -> ['3', '.', '1416']
5112
        # will also erroneously match the following
5113
        print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
5114

5115
        real = Combine(Word(nums) + '.' + Word(nums))
5116
        print(real.parseString('3.1416')) # -> ['3.1416']
5117
        # no match when there are internal spaces
5118
        print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
5119
    """
5120
    def __init__(self, expr, joinString="", adjacent=True):
5121
        super(Combine, self).__init__(expr)
5122
        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
5123
        if adjacent:
5124
            self.leaveWhitespace()
5125
        self.adjacent = adjacent
5126
        self.skipWhitespace = True
5127
        self.joinString = joinString
5128
        self.callPreparse = True
5129

5130
    def ignore(self, other):
5131
        if self.adjacent:
5132
            ParserElement.ignore(self, other)
5133
        else:
5134
            super(Combine, self).ignore(other)
5135
        return self
5136

5137
    def postParse(self, instring, loc, tokenlist):
5138
        retToks = tokenlist.copy()
5139
        del retToks[:]
5140
        retToks += ParseResults(["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults)
5141

5142
        if self.resultsName and retToks.haskeys():
5143
            return [retToks]
5144
        else:
5145
            return retToks
5146

5147
class Group(TokenConverter):
5148
    """Converter to return the matched tokens as a list - useful for
5149
    returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5150

5151
    Example::
5152

5153
        ident = Word(alphas)
5154
        num = Word(nums)
5155
        term = ident | num
5156
        func = ident + Optional(delimitedList(term))
5157
        print(func.parseString("fn a, b, 100"))  # -> ['fn', 'a', 'b', '100']
5158

5159
        func = ident + Group(Optional(delimitedList(term)))
5160
        print(func.parseString("fn a, b, 100"))  # -> ['fn', ['a', 'b', '100']]
5161
    """
5162
    def __init__(self, expr):
5163
        super(Group, self).__init__(expr)
5164
        self.saveAsList = True
5165

5166
    def postParse(self, instring, loc, tokenlist):
5167
        return [tokenlist]
5168

5169
class Dict(TokenConverter):
5170
    """Converter to return a repetitive expression as a list, but also
5171
    as a dictionary. Each element can also be referenced using the first
5172
    token in the expression as its key. Useful for tabular report
5173
    scraping when the first column can be used as a item key.
5174

5175
    Example::
5176

5177
        data_word = Word(alphas)
5178
        label = data_word + FollowedBy(':')
5179
        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
5180

5181
        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5182
        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
5183

5184
        # print attributes as plain groups
5185
        print(OneOrMore(attr_expr).parseString(text).dump())
5186

5187
        # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
5188
        result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
5189
        print(result.dump())
5190

5191
        # access named fields as dict entries, or output as dict
5192
        print(result['shape'])
5193
        print(result.asDict())
5194

5195
    prints::
5196

5197
        ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5198
        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5199
        - color: light blue
5200
        - posn: upper left
5201
        - shape: SQUARE
5202
        - texture: burlap
5203
        SQUARE
5204
        {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
5205

5206
    See more examples at :class:`ParseResults` of accessing fields by results name.
5207
    """
5208
    def __init__(self, expr):
5209
        super(Dict, self).__init__(expr)
5210
        self.saveAsList = True
5211

5212
    def postParse(self, instring, loc, tokenlist):
5213
        for i, tok in enumerate(tokenlist):
5214
            if len(tok) == 0:
5215
                continue
5216
            ikey = tok[0]
5217
            if isinstance(ikey, int):
5218
                ikey = _ustr(tok[0]).strip()
5219
            if len(tok) == 1:
5220
                tokenlist[ikey] = _ParseResultsWithOffset("", i)
5221
            elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
5222
                tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
5223
            else:
5224
                dictvalue = tok.copy()  # ParseResults(i)
5225
                del dictvalue[0]
5226
                if len(dictvalue) != 1 or (isinstance(dictvalue, ParseResults) and dictvalue.haskeys()):
5227
                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
5228
                else:
5229
                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
5230

5231
        if self.resultsName:
5232
            return [tokenlist]
5233
        else:
5234
            return tokenlist
5235

5236

5237
class Suppress(TokenConverter):
5238
    """Converter for ignoring the results of a parsed expression.
5239

5240
    Example::
5241

5242
        source = "a, b, c,d"
5243
        wd = Word(alphas)
5244
        wd_list1 = wd + ZeroOrMore(',' + wd)
5245
        print(wd_list1.parseString(source))
5246

5247
        # often, delimiters that are useful during parsing are just in the
5248
        # way afterward - use Suppress to keep them out of the parsed output
5249
        wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
5250
        print(wd_list2.parseString(source))
5251

5252
    prints::
5253

5254
        ['a', ',', 'b', ',', 'c', ',', 'd']
5255
        ['a', 'b', 'c', 'd']
5256

5257
    (See also :class:`delimitedList`.)
5258
    """
5259
    def postParse(self, instring, loc, tokenlist):
5260
        return []
5261

5262
    def suppress(self):
5263
        return self
5264

5265

5266
class OnlyOnce(object):
5267
    """Wrapper for parse actions, to ensure they are only called once.
5268
    """
5269
    def __init__(self, methodCall):
5270
        self.callable = _trim_arity(methodCall)
5271
        self.called = False
5272
    def __call__(self, s, l, t):
5273
        if not self.called:
5274
            results = self.callable(s, l, t)
5275
            self.called = True
5276
            return results
5277
        raise ParseException(s, l, "")
5278
    def reset(self):
5279
        self.called = False
5280

5281
def traceParseAction(f):
5282
    """Decorator for debugging parse actions.
5283

5284
    When the parse action is called, this decorator will print
5285
    ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
5286
    When the parse action completes, the decorator will print
5287
    ``"<<"`` followed by the returned value, or any exception that the parse action raised.
5288

5289
    Example::
5290

5291
        wd = Word(alphas)
5292

5293
        @traceParseAction
5294
        def remove_duplicate_chars(tokens):
5295
            return ''.join(sorted(set(''.join(tokens))))
5296

5297
        wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
5298
        print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
5299

5300
    prints::
5301

5302
        >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
5303
        <<leaving remove_duplicate_chars (ret: 'dfjkls')
5304
        ['dfjkls']
5305
    """
5306
    f = _trim_arity(f)
5307
    def z(*paArgs):
5308
        thisFunc = f.__name__
5309
        s, l, t = paArgs[-3:]
5310
        if len(paArgs) > 3:
5311
            thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
5312
        sys.stderr.write(">>entering %s(line: '%s', %d, %r)\n" % (thisFunc, line(l, s), l, t))
5313
        try:
5314
            ret = f(*paArgs)
5315
        except Exception as exc:
5316
            sys.stderr.write("<<leaving %s (exception: %s)\n" % (thisFunc, exc))
5317
            raise
5318
        sys.stderr.write("<<leaving %s (ret: %r)\n" % (thisFunc, ret))
5319
        return ret
5320
    try:
5321
        z.__name__ = f.__name__
5322
    except AttributeError:
5323
        pass
5324
    return z
5325

5326
#
5327
# global helpers
5328
#
5329
def delimitedList(expr, delim=",", combine=False):
5330
    """Helper to define a delimited list of expressions - the delimiter
5331
    defaults to ','. By default, the list elements and delimiters can
5332
    have intervening whitespace, and comments, but this can be
5333
    overridden by passing ``combine=True`` in the constructor. If
5334
    ``combine`` is set to ``True``, the matching tokens are
5335
    returned as a single token string, with the delimiters included;
5336
    otherwise, the matching tokens are returned as a list of tokens,
5337
    with the delimiters suppressed.
5338

5339
    Example::
5340

5341
        delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
5342
        delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
5343
    """
5344
    dlName = _ustr(expr) + " [" + _ustr(delim) + " " + _ustr(expr) + "]..."
5345
    if combine:
5346
        return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName)
5347
    else:
5348
        return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName)
5349

5350
def countedArray(expr, intExpr=None):
5351
    """Helper to define a counted list of expressions.
5352

5353
    This helper defines a pattern of the form::
5354

5355
        integer expr expr expr...
5356

5357
    where the leading integer tells how many expr expressions follow.
5358
    The matched tokens returns the array of expr tokens as a list - the
5359
    leading count token is suppressed.
5360

5361
    If ``intExpr`` is specified, it should be a pyparsing expression
5362
    that produces an integer value.
5363

5364
    Example::
5365

5366
        countedArray(Word(alphas)).parseString('2 ab cd ef')  # -> ['ab', 'cd']
5367

5368
        # in this parser, the leading integer value is given in binary,
5369
        # '10' indicating that 2 values are in the array
5370
        binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
5371
        countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef')  # -> ['ab', 'cd']
5372
    """
5373
    arrayExpr = Forward()
5374
    def countFieldParseAction(s, l, t):
5375
        n = t[0]
5376
        arrayExpr << (n and Group(And([expr] * n)) or Group(empty))
5377
        return []
5378
    if intExpr is None:
5379
        intExpr = Word(nums).setParseAction(lambda t: int(t[0]))
5380
    else:
5381
        intExpr = intExpr.copy()
5382
    intExpr.setName("arrayLen")
5383
    intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
5384
    return (intExpr + arrayExpr).setName('(len) ' + _ustr(expr) + '...')
5385

5386
def _flatten(L):
5387
    ret = []
5388
    for i in L:
5389
        if isinstance(i, list):
5390
            ret.extend(_flatten(i))
5391
        else:
5392
            ret.append(i)
5393
    return ret
5394

5395
def matchPreviousLiteral(expr):
5396
    """Helper to define an expression that is indirectly defined from
5397
    the tokens matched in a previous expression, that is, it looks for
5398
    a 'repeat' of a previous expression.  For example::
5399

5400
        first = Word(nums)
5401
        second = matchPreviousLiteral(first)
5402
        matchExpr = first + ":" + second
5403

5404
    will match ``"1:1"``, but not ``"1:2"``.  Because this
5405
    matches a previous literal, will also match the leading
5406
    ``"1:1"`` in ``"1:10"``. If this is not desired, use
5407
    :class:`matchPreviousExpr`. Do *not* use with packrat parsing
5408
    enabled.
5409
    """
5410
    rep = Forward()
5411
    def copyTokenToRepeater(s, l, t):
5412
        if t:
5413
            if len(t) == 1:
5414
                rep << t[0]
5415
            else:
5416
                # flatten t tokens
5417
                tflat = _flatten(t.asList())
5418
                rep << And(Literal(tt) for tt in tflat)
5419
        else:
5420
            rep << Empty()
5421
    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
5422
    rep.setName('(prev) ' + _ustr(expr))
5423
    return rep
5424

5425
def matchPreviousExpr(expr):
5426
    """Helper to define an expression that is indirectly defined from
5427
    the tokens matched in a previous expression, that is, it looks for
5428
    a 'repeat' of a previous expression.  For example::
5429

5430
        first = Word(nums)
5431
        second = matchPreviousExpr(first)
5432
        matchExpr = first + ":" + second
5433

5434
    will match ``"1:1"``, but not ``"1:2"``.  Because this
5435
    matches by expressions, will *not* match the leading ``"1:1"``
5436
    in ``"1:10"``; the expressions are evaluated first, and then
5437
    compared, so ``"1"`` is compared with ``"10"``. Do *not* use
5438
    with packrat parsing enabled.
5439
    """
5440
    rep = Forward()
5441
    e2 = expr.copy()
5442
    rep <<= e2
5443
    def copyTokenToRepeater(s, l, t):
5444
        matchTokens = _flatten(t.asList())
5445
        def mustMatchTheseTokens(s, l, t):
5446
            theseTokens = _flatten(t.asList())
5447
            if theseTokens != matchTokens:
5448
                raise ParseException('', 0, '')
5449
        rep.setParseAction(mustMatchTheseTokens, callDuringTry=True)
5450
    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
5451
    rep.setName('(prev) ' + _ustr(expr))
5452
    return rep
5453

5454
def _escapeRegexRangeChars(s):
5455
    # ~  escape these chars: ^-[]
5456
    for c in r"\^-[]":
5457
        s = s.replace(c, _bslash + c)
5458
    s = s.replace("\n", r"\n")
5459
    s = s.replace("\t", r"\t")
5460
    return _ustr(s)
5461

5462
def oneOf(strs, caseless=False, useRegex=True, asKeyword=False):
5463
    """Helper to quickly define a set of alternative Literals, and makes
5464
    sure to do longest-first testing when there is a conflict,
5465
    regardless of the input order, but returns
5466
    a :class:`MatchFirst` for best performance.
5467

5468
    Parameters:
5469

5470
     - strs - a string of space-delimited literals, or a collection of
5471
       string literals
5472
     - caseless - (default= ``False``) - treat all literals as
5473
       caseless
5474
     - useRegex - (default= ``True``) - as an optimization, will
5475
       generate a Regex object; otherwise, will generate
5476
       a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if
5477
       creating a :class:`Regex` raises an exception)
5478
     - asKeyword - (default=``False``) - enforce Keyword-style matching on the
5479
       generated expressions
5480

5481
    Example::
5482

5483
        comp_oper = oneOf("< = > <= >= !=")
5484
        var = Word(alphas)
5485
        number = Word(nums)
5486
        term = var | number
5487
        comparison_expr = term + comp_oper + term
5488
        print(comparison_expr.searchString("B = 12  AA=23 B<=AA AA>12"))
5489

5490
    prints::
5491

5492
        [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
5493
    """
5494
    if isinstance(caseless, basestring):
5495
        warnings.warn("More than one string argument passed to oneOf, pass "
5496
                      "choices as a list or space-delimited string", stacklevel=2)
5497

5498
    if caseless:
5499
        isequal = (lambda a, b: a.upper() == b.upper())
5500
        masks = (lambda a, b: b.upper().startswith(a.upper()))
5501
        parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
5502
    else:
5503
        isequal = (lambda a, b: a == b)
5504
        masks = (lambda a, b: b.startswith(a))
5505
        parseElementClass = Keyword if asKeyword else Literal
5506

5507
    symbols = []
5508
    if isinstance(strs, basestring):
5509
        symbols = strs.split()
5510
    elif isinstance(strs, Iterable):
5511
        symbols = list(strs)
5512
    else:
5513
        warnings.warn("Invalid argument to oneOf, expected string or iterable",
5514
                      SyntaxWarning, stacklevel=2)
5515
    if not symbols:
5516
        return NoMatch()
5517

5518
    if not asKeyword:
5519
        # if not producing keywords, need to reorder to take care to avoid masking
5520
        # longer choices with shorter ones
5521
        i = 0
5522
        while i < len(symbols) - 1:
5523
            cur = symbols[i]
5524
            for j, other in enumerate(symbols[i + 1:]):
5525
                if isequal(other, cur):
5526
                    del symbols[i + j + 1]
5527
                    break
5528
                elif masks(cur, other):
5529
                    del symbols[i + j + 1]
5530
                    symbols.insert(i, other)
5531
                    break
5532
            else:
5533
                i += 1
5534

5535
    if not (caseless or asKeyword) and useRegex:
5536
        # ~ print (strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols]))
5537
        try:
5538
            if len(symbols) == len("".join(symbols)):
5539
                return Regex("[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols))
5540
            else:
5541
                return Regex("|".join(re.escape(sym) for sym in symbols)).setName(' | '.join(symbols))
5542
        except Exception:
5543
            warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
5544
                    SyntaxWarning, stacklevel=2)
5545

5546
    # last resort, just use MatchFirst
5547
    return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
5548

5549
def dictOf(key, value):
5550
    """Helper to easily and clearly define a dictionary by specifying
5551
    the respective patterns for the key and value.  Takes care of
5552
    defining the :class:`Dict`, :class:`ZeroOrMore`, and
5553
    :class:`Group` tokens in the proper order.  The key pattern
5554
    can include delimiting markers or punctuation, as long as they are
5555
    suppressed, thereby leaving the significant key text.  The value
5556
    pattern can include named results, so that the :class:`Dict` results
5557
    can include named token fields.
5558

5559
    Example::
5560

5561
        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5562
        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
5563
        print(OneOrMore(attr_expr).parseString(text).dump())
5564

5565
        attr_label = label
5566
        attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
5567

5568
        # similar to Dict, but simpler call format
5569
        result = dictOf(attr_label, attr_value).parseString(text)
5570
        print(result.dump())
5571
        print(result['shape'])
5572
        print(result.shape)  # object attribute access works too
5573
        print(result.asDict())
5574

5575
    prints::
5576

5577
        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5578
        - color: light blue
5579
        - posn: upper left
5580
        - shape: SQUARE
5581
        - texture: burlap
5582
        SQUARE
5583
        SQUARE
5584
        {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
5585
    """
5586
    return Dict(OneOrMore(Group(key + value)))
5587

5588
def originalTextFor(expr, asString=True):
5589
    """Helper to return the original, untokenized text for a given
5590
    expression.  Useful to restore the parsed fields of an HTML start
5591
    tag into the raw tag text itself, or to revert separate tokens with
5592
    intervening whitespace back to the original matching input text. By
5593
    default, returns astring containing the original parsed text.
5594

5595
    If the optional ``asString`` argument is passed as
5596
    ``False``, then the return value is
5597
    a :class:`ParseResults` containing any results names that
5598
    were originally matched, and a single token containing the original
5599
    matched text from the input string.  So if the expression passed to
5600
    :class:`originalTextFor` contains expressions with defined
5601
    results names, you must set ``asString`` to ``False`` if you
5602
    want to preserve those results name values.
5603

5604
    Example::
5605

5606
        src = "this is test <b> bold <i>text</i> </b> normal text "
5607
        for tag in ("b", "i"):
5608
            opener, closer = makeHTMLTags(tag)
5609
            patt = originalTextFor(opener + SkipTo(closer) + closer)
5610
            print(patt.searchString(src)[0])
5611

5612
    prints::
5613

5614
        ['<b> bold <i>text</i> </b>']
5615
        ['<i>text</i>']
5616
    """
5617
    locMarker = Empty().setParseAction(lambda s, loc, t: loc)
5618
    endlocMarker = locMarker.copy()
5619
    endlocMarker.callPreparse = False
5620
    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
5621
    if asString:
5622
        extractText = lambda s, l, t: s[t._original_start: t._original_end]
5623
    else:
5624
        def extractText(s, l, t):
5625
            t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
5626
    matchExpr.setParseAction(extractText)
5627
    matchExpr.ignoreExprs = expr.ignoreExprs
5628
    return matchExpr
5629

5630
def ungroup(expr):
5631
    """Helper to undo pyparsing's default grouping of And expressions,
5632
    even if all but one are non-empty.
5633
    """
5634
    return TokenConverter(expr).addParseAction(lambda t: t[0])
5635

5636
def locatedExpr(expr):
5637
    """Helper to decorate a returned token with its starting and ending
5638
    locations in the input string.
5639

5640
    This helper adds the following results names:
5641

5642
     - locn_start = location where matched expression begins
5643
     - locn_end = location where matched expression ends
5644
     - value = the actual parsed results
5645

5646
    Be careful if the input text contains ``<TAB>`` characters, you
5647
    may want to call :class:`ParserElement.parseWithTabs`
5648

5649
    Example::
5650

5651
        wd = Word(alphas)
5652
        for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
5653
            print(match)
5654

5655
    prints::
5656

5657
        [[0, 'ljsdf', 5]]
5658
        [[8, 'lksdjjf', 15]]
5659
        [[18, 'lkkjj', 23]]
5660
    """
5661
    locator = Empty().setParseAction(lambda s, l, t: l)
5662
    return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
5663

5664

5665
# convenience constants for positional expressions
5666
empty       = Empty().setName("empty")
5667
lineStart   = LineStart().setName("lineStart")
5668
lineEnd     = LineEnd().setName("lineEnd")
5669
stringStart = StringStart().setName("stringStart")
5670
stringEnd   = StringEnd().setName("stringEnd")
5671

5672
_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction(lambda s, l, t: t[0][1])
5673
_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s, l, t: unichr(int(t[0].lstrip(r'\0x'), 16)))
5674
_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s, l, t: unichr(int(t[0][1:], 8)))
5675
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
5676
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
5677
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") + "]"
5678

5679
def srange(s):
5680
    r"""Helper to easily define string ranges for use in Word
5681
    construction. Borrows syntax from regexp '[]' string range
5682
    definitions::
5683

5684
        srange("[0-9]")   -> "0123456789"
5685
        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
5686
        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
5687

5688
    The input string must be enclosed in []'s, and the returned string
5689
    is the expanded character set joined into a single string. The
5690
    values enclosed in the []'s may be:
5691

5692
     - a single character
5693
     - an escaped character with a leading backslash (such as ``\-``
5694
       or ``\]``)
5695
     - an escaped hex character with a leading ``'\x'``
5696
       (``\x21``, which is a ``'!'`` character) (``\0x##``
5697
       is also supported for backwards compatibility)
5698
     - an escaped octal character with a leading ``'\0'``
5699
       (``\041``, which is a ``'!'`` character)
5700
     - a range of any of the above, separated by a dash (``'a-z'``,
5701
       etc.)
5702
     - any combination of the above (``'aeiouy'``,
5703
       ``'a-zA-Z0-9_$'``, etc.)
5704
    """
5705
    _expanded = lambda p: p if not isinstance(p, ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
5706
    try:
5707
        return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
5708
    except Exception:
5709
        return ""
5710

5711
def matchOnlyAtCol(n):
5712
    """Helper method for defining parse actions that require matching at
5713
    a specific column in the input text.
5714
    """
5715
    def verifyCol(strg, locn, toks):
5716
        if col(locn, strg) != n:
5717
            raise ParseException(strg, locn, "matched token not at column %d" % n)
5718
    return verifyCol
5719

5720
def replaceWith(replStr):
5721
    """Helper method for common parse actions that simply return
5722
    a literal value.  Especially useful when used with
5723
    :class:`transformString<ParserElement.transformString>` ().
5724

5725
    Example::
5726

5727
        num = Word(nums).setParseAction(lambda toks: int(toks[0]))
5728
        na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
5729
        term = na | num
5730

5731
        OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
5732
    """
5733
    return lambda s, l, t: [replStr]
5734

5735
def removeQuotes(s, l, t):
5736
    """Helper parse action for removing quotation marks from parsed
5737
    quoted strings.
5738

5739
    Example::
5740

5741
        # by default, quotation marks are included in parsed results
5742
        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
5743

5744
        # use removeQuotes to strip quotation marks from parsed results
5745
        quotedString.setParseAction(removeQuotes)
5746
        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
5747
    """
5748
    return t[0][1:-1]
5749

5750
def tokenMap(func, *args):
5751
    """Helper to define a parse action by mapping a function to all
5752
    elements of a ParseResults list. If any additional args are passed,
5753
    they are forwarded to the given function as additional arguments
5754
    after the token, as in
5755
    ``hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))``,
5756
    which will convert the parsed data to an integer using base 16.
5757

5758
    Example (compare the last to example in :class:`ParserElement.transformString`::
5759

5760
        hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
5761
        hex_ints.runTests('''
5762
            00 11 22 aa FF 0a 0d 1a
5763
            ''')
5764

5765
        upperword = Word(alphas).setParseAction(tokenMap(str.upper))
5766
        OneOrMore(upperword).runTests('''
5767
            my kingdom for a horse
5768
            ''')
5769

5770
        wd = Word(alphas).setParseAction(tokenMap(str.title))
5771
        OneOrMore(wd).setParseAction(' '.join).runTests('''
5772
            now is the winter of our discontent made glorious summer by this sun of york
5773
            ''')
5774

5775
    prints::
5776

5777
        00 11 22 aa FF 0a 0d 1a
5778
        [0, 17, 34, 170, 255, 10, 13, 26]
5779

5780
        my kingdom for a horse
5781
        ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
5782

5783
        now is the winter of our discontent made glorious summer by this sun of york
5784
        ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
5785
    """
5786
    def pa(s, l, t):
5787
        return [func(tokn, *args) for tokn in t]
5788

5789
    try:
5790
        func_name = getattr(func, '__name__',
5791
                            getattr(func, '__class__').__name__)
5792
    except Exception:
5793
        func_name = str(func)
5794
    pa.__name__ = func_name
5795

5796
    return pa
5797

5798
upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
5799
"""(Deprecated) Helper parse action to convert tokens to upper case.
5800
Deprecated in favor of :class:`pyparsing_common.upcaseTokens`"""
5801

5802
downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
5803
"""(Deprecated) Helper parse action to convert tokens to lower case.
5804
Deprecated in favor of :class:`pyparsing_common.downcaseTokens`"""
5805

5806
def _makeTags(tagStr, xml,
5807
              suppress_LT=Suppress("<"),
5808
              suppress_GT=Suppress(">")):
5809
    """Internal helper to construct opening and closing tag expressions, given a tag name"""
5810
    if isinstance(tagStr, basestring):
5811
        resname = tagStr
5812
        tagStr = Keyword(tagStr, caseless=not xml)
5813
    else:
5814
        resname = tagStr.name
5815

5816
    tagAttrName = Word(alphas, alphanums + "_-:")
5817
    if xml:
5818
        tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes)
5819
        openTag = (suppress_LT
5820
                   + tagStr("tag")
5821
                   + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
5822
                   + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/')
5823
                   + suppress_GT)
5824
    else:
5825
        tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word(printables, excludeChars=">")
5826
        openTag = (suppress_LT
5827
                   + tagStr("tag")
5828
                   + Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens)
5829
                                           + Optional(Suppress("=") + tagAttrValue))))
5830
                   + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/')
5831
                   + suppress_GT)
5832
    closeTag = Combine(_L("</") + tagStr + ">", adjacent=False)
5833

5834
    openTag.setName("<%s>" % resname)
5835
    # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
5836
    openTag.addParseAction(lambda t: t.__setitem__("start" + "".join(resname.replace(":", " ").title().split()), t.copy()))
5837
    closeTag = closeTag("end" + "".join(resname.replace(":", " ").title().split())).setName("</%s>" % resname)
5838
    openTag.tag = resname
5839
    closeTag.tag = resname
5840
    openTag.tag_body = SkipTo(closeTag())
5841
    return openTag, closeTag
5842

5843
def makeHTMLTags(tagStr):
5844
    """Helper to construct opening and closing tag expressions for HTML,
5845
    given a tag name. Matches tags in either upper or lower case,
5846
    attributes with namespaces and with quoted or unquoted values.
5847

5848
    Example::
5849

5850
        text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
5851
        # makeHTMLTags returns pyparsing expressions for the opening and
5852
        # closing tags as a 2-tuple
5853
        a, a_end = makeHTMLTags("A")
5854
        link_expr = a + SkipTo(a_end)("link_text") + a_end
5855

5856
        for link in link_expr.searchString(text):
5857
            # attributes in the <A> tag (like "href" shown here) are
5858
            # also accessible as named results
5859
            print(link.link_text, '->', link.href)
5860

5861
    prints::
5862

5863
        pyparsing -> https://github.com/pyparsing/pyparsing/wiki
5864
    """
5865
    return _makeTags(tagStr, False)
5866

5867
def makeXMLTags(tagStr):
5868
    """Helper to construct opening and closing tag expressions for XML,
5869
    given a tag name. Matches tags only in the given upper/lower case.
5870

5871
    Example: similar to :class:`makeHTMLTags`
5872
    """
5873
    return _makeTags(tagStr, True)
5874

5875
def withAttribute(*args, **attrDict):
5876
    """Helper to create a validating parse action to be used with start
5877
    tags created with :class:`makeXMLTags` or
5878
    :class:`makeHTMLTags`. Use ``withAttribute`` to qualify
5879
    a starting tag with a required attribute value, to avoid false
5880
    matches on common tags such as ``<TD>`` or ``<DIV>``.
5881

5882
    Call ``withAttribute`` with a series of attribute names and
5883
    values. Specify the list of filter attributes names and values as:
5884

5885
     - keyword arguments, as in ``(align="right")``, or
5886
     - as an explicit dict with ``**`` operator, when an attribute
5887
       name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
5888
     - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
5889

5890
    For attribute names with a namespace prefix, you must use the second
5891
    form.  Attribute names are matched insensitive to upper/lower case.
5892

5893
    If just testing for ``class`` (with or without a namespace), use
5894
    :class:`withClass`.
5895

5896
    To verify that the attribute exists, but without specifying a value,
5897
    pass ``withAttribute.ANY_VALUE`` as the value.
5898

5899
    Example::
5900

5901
        html = '''
5902
            <div>
5903
            Some text
5904
            <div type="grid">1 4 0 1 0</div>
5905
            <div type="graph">1,3 2,3 1,1</div>
5906
            <div>this has no type</div>
5907
            </div>
5908

5909
        '''
5910
        div,div_end = makeHTMLTags("div")
5911

5912
        # only match div tag having a type attribute with value "grid"
5913
        div_grid = div().setParseAction(withAttribute(type="grid"))
5914
        grid_expr = div_grid + SkipTo(div | div_end)("body")
5915
        for grid_header in grid_expr.searchString(html):
5916
            print(grid_header.body)
5917

5918
        # construct a match with any div tag having a type attribute, regardless of the value
5919
        div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
5920
        div_expr = div_any_type + SkipTo(div | div_end)("body")
5921
        for div_header in div_expr.searchString(html):
5922
            print(div_header.body)
5923

5924
    prints::
5925

5926
        1 4 0 1 0
5927

5928
        1 4 0 1 0
5929
        1,3 2,3 1,1
5930
    """
5931
    if args:
5932
        attrs = args[:]
5933
    else:
5934
        attrs = attrDict.items()
5935
    attrs = [(k, v) for k, v in attrs]
5936
    def pa(s, l, tokens):
5937
        for attrName, attrValue in attrs:
5938
            if attrName not in tokens:
5939
                raise ParseException(s, l, "no matching attribute " + attrName)
5940
            if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
5941
                raise ParseException(s, l, "attribute '%s' has value '%s', must be '%s'" %
5942
                                            (attrName, tokens[attrName], attrValue))
5943
    return pa
5944
withAttribute.ANY_VALUE = object()
5945

5946
def withClass(classname, namespace=''):
5947
    """Simplified version of :class:`withAttribute` when
5948
    matching on a div class - made difficult because ``class`` is
5949
    a reserved word in Python.
5950

5951
    Example::
5952

5953
        html = '''
5954
            <div>
5955
            Some text
5956
            <div class="grid">1 4 0 1 0</div>
5957
            <div class="graph">1,3 2,3 1,1</div>
5958
            <div>this &lt;div&gt; has no class</div>
5959
            </div>
5960

5961
        '''
5962
        div,div_end = makeHTMLTags("div")
5963
        div_grid = div().setParseAction(withClass("grid"))
5964

5965
        grid_expr = div_grid + SkipTo(div | div_end)("body")
5966
        for grid_header in grid_expr.searchString(html):
5967
            print(grid_header.body)
5968

5969
        div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
5970
        div_expr = div_any_type + SkipTo(div | div_end)("body")
5971
        for div_header in div_expr.searchString(html):
5972
            print(div_header.body)
5973

5974
    prints::
5975

5976
        1 4 0 1 0
5977

5978
        1 4 0 1 0
5979
        1,3 2,3 1,1
5980
    """
5981
    classattr = "%s:class" % namespace if namespace else "class"
5982
    return withAttribute(**{classattr: classname})
5983

5984
opAssoc = SimpleNamespace()
5985
opAssoc.LEFT = object()
5986
opAssoc.RIGHT = object()
5987

5988
def infixNotation(baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')')):
5989
    """Helper method for constructing grammars of expressions made up of
5990
    operators working in a precedence hierarchy.  Operators may be unary
5991
    or binary, left- or right-associative.  Parse actions can also be
5992
    attached to operator expressions. The generated parser will also
5993
    recognize the use of parentheses to override operator precedences
5994
    (see example below).
5995

5996
    Note: if you define a deep operator list, you may see performance
5997
    issues when using infixNotation. See
5998
    :class:`ParserElement.enablePackrat` for a mechanism to potentially
5999
    improve your parser performance.
6000

6001
    Parameters:
6002
     - baseExpr - expression representing the most basic element for the
6003
       nested
6004
     - opList - list of tuples, one for each operator precedence level
6005
       in the expression grammar; each tuple is of the form ``(opExpr,
6006
       numTerms, rightLeftAssoc, parseAction)``, where:
6007

6008
       - opExpr is the pyparsing expression for the operator; may also
6009
         be a string, which will be converted to a Literal; if numTerms
6010
         is 3, opExpr is a tuple of two expressions, for the two
6011
         operators separating the 3 terms
6012
       - numTerms is the number of terms for this operator (must be 1,
6013
         2, or 3)
6014
       - rightLeftAssoc is the indicator whether the operator is right
6015
         or left associative, using the pyparsing-defined constants
6016
         ``opAssoc.RIGHT`` and ``opAssoc.LEFT``.
6017
       - parseAction is the parse action to be associated with
6018
         expressions matching this operator expression (the parse action
6019
         tuple member may be omitted); if the parse action is passed
6020
         a tuple or list of functions, this is equivalent to calling
6021
         ``setParseAction(*fn)``
6022
         (:class:`ParserElement.setParseAction`)
6023
     - lpar - expression for matching left-parentheses
6024
       (default= ``Suppress('(')``)
6025
     - rpar - expression for matching right-parentheses
6026
       (default= ``Suppress(')')``)
6027

6028
    Example::
6029

6030
        # simple example of four-function arithmetic with ints and
6031
        # variable names
6032
        integer = pyparsing_common.signed_integer
6033
        varname = pyparsing_common.identifier
6034

6035
        arith_expr = infixNotation(integer | varname,
6036
            [
6037
            ('-', 1, opAssoc.RIGHT),
6038
            (oneOf('* /'), 2, opAssoc.LEFT),
6039
            (oneOf('+ -'), 2, opAssoc.LEFT),
6040
            ])
6041

6042
        arith_expr.runTests('''
6043
            5+3*6
6044
            (5+3)*6
6045
            -2--11
6046
            ''', fullDump=False)
6047

6048
    prints::
6049

6050
        5+3*6
6051
        [[5, '+', [3, '*', 6]]]
6052

6053
        (5+3)*6
6054
        [[[5, '+', 3], '*', 6]]
6055

6056
        -2--11
6057
        [[['-', 2], '-', ['-', 11]]]
6058
    """
6059
    # captive version of FollowedBy that does not do parse actions or capture results names
6060
    class _FB(FollowedBy):
6061
        def parseImpl(self, instring, loc, doActions=True):
6062
            self.expr.tryParse(instring, loc)
6063
            return loc, []
6064

6065
    ret = Forward()
6066
    lastExpr = baseExpr | (lpar + ret + rpar)
6067
    for i, operDef in enumerate(opList):
6068
        opExpr, arity, rightLeftAssoc, pa = (operDef + (None, ))[:4]
6069
        termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
6070
        if arity == 3:
6071
            if opExpr is None or len(opExpr) != 2:
6072
                raise ValueError(
6073
                    "if numterms=3, opExpr must be a tuple or list of two expressions")
6074
            opExpr1, opExpr2 = opExpr
6075
        thisExpr = Forward().setName(termName)
6076
        if rightLeftAssoc == opAssoc.LEFT:
6077
            if arity == 1:
6078
                matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + OneOrMore(opExpr))
6079
            elif arity == 2:
6080
                if opExpr is not None:
6081
                    matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(lastExpr + OneOrMore(opExpr + lastExpr))
6082
                else:
6083
                    matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr + OneOrMore(lastExpr))
6084
            elif arity == 3:
6085
                matchExpr = (_FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr)
6086
                             + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)))
6087
            else:
6088
                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
6089
        elif rightLeftAssoc == opAssoc.RIGHT:
6090
            if arity == 1:
6091
                # try to avoid LR with this extra test
6092
                if not isinstance(opExpr, Optional):
6093
                    opExpr = Optional(opExpr)
6094
                matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
6095
            elif arity == 2:
6096
                if opExpr is not None:
6097
                    matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(lastExpr + OneOrMore(opExpr + thisExpr))
6098
                else:
6099
                    matchExpr = _FB(lastExpr + thisExpr) + Group(lastExpr + OneOrMore(thisExpr))
6100
            elif arity == 3:
6101
                matchExpr = (_FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
6102
                             + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr))
6103
            else:
6104
                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
6105
        else:
6106
            raise ValueError("operator must indicate right or left associativity")
6107
        if pa:
6108
            if isinstance(pa, (tuple, list)):
6109
                matchExpr.setParseAction(*pa)
6110
            else:
6111
                matchExpr.setParseAction(pa)
6112
        thisExpr <<= (matchExpr.setName(termName) | lastExpr)
6113
        lastExpr = thisExpr
6114
    ret <<= lastExpr
6115
    return ret
6116

6117
operatorPrecedence = infixNotation
6118
"""(Deprecated) Former name of :class:`infixNotation`, will be
6119
dropped in a future release."""
6120

6121
dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').setName("string enclosed in double quotes")
6122
sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("string enclosed in single quotes")
6123
quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6124
                       | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("quotedString using single or double quotes")
6125
unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
6126

6127
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
6128
    """Helper method for defining nested lists enclosed in opening and
6129
    closing delimiters ("(" and ")" are the default).
6130

6131
    Parameters:
6132
     - opener - opening character for a nested list
6133
       (default= ``"("``); can also be a pyparsing expression
6134
     - closer - closing character for a nested list
6135
       (default= ``")"``); can also be a pyparsing expression
6136
     - content - expression for items within the nested lists
6137
       (default= ``None``)
6138
     - ignoreExpr - expression for ignoring opening and closing
6139
       delimiters (default= :class:`quotedString`)
6140

6141
    If an expression is not provided for the content argument, the
6142
    nested expression will capture all whitespace-delimited content
6143
    between delimiters as a list of separate values.
6144

6145
    Use the ``ignoreExpr`` argument to define expressions that may
6146
    contain opening or closing characters that should not be treated as
6147
    opening or closing characters for nesting, such as quotedString or
6148
    a comment expression.  Specify multiple expressions using an
6149
    :class:`Or` or :class:`MatchFirst`. The default is
6150
    :class:`quotedString`, but if no expressions are to be ignored, then
6151
    pass ``None`` for this argument.
6152

6153
    Example::
6154

6155
        data_type = oneOf("void int short long char float double")
6156
        decl_data_type = Combine(data_type + Optional(Word('*')))
6157
        ident = Word(alphas+'_', alphanums+'_')
6158
        number = pyparsing_common.number
6159
        arg = Group(decl_data_type + ident)
6160
        LPAR, RPAR = map(Suppress, "()")
6161

6162
        code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
6163

6164
        c_function = (decl_data_type("type")
6165
                      + ident("name")
6166
                      + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
6167
                      + code_body("body"))
6168
        c_function.ignore(cStyleComment)
6169

6170
        source_code = '''
6171
            int is_odd(int x) {
6172
                return (x%2);
6173
            }
6174

6175
            int dec_to_hex(char hchar) {
6176
                if (hchar >= '0' && hchar <= '9') {
6177
                    return (ord(hchar)-ord('0'));
6178
                } else {
6179
                    return (10+ord(hchar)-ord('A'));
6180
                }
6181
            }
6182
        '''
6183
        for func in c_function.searchString(source_code):
6184
            print("%(name)s (%(type)s) args: %(args)s" % func)
6185

6186

6187
    prints::
6188

6189
        is_odd (int) args: [['int', 'x']]
6190
        dec_to_hex (int) args: [['char', 'hchar']]
6191
    """
6192
    if opener == closer:
6193
        raise ValueError("opening and closing strings cannot be the same")
6194
    if content is None:
6195
        if isinstance(opener, basestring) and isinstance(closer, basestring):
6196
            if len(opener) == 1 and len(closer) == 1:
6197
                if ignoreExpr is not None:
6198
                    content = (Combine(OneOrMore(~ignoreExpr
6199
                                                 + CharsNotIn(opener
6200
                                                              + closer
6201
                                                              + ParserElement.DEFAULT_WHITE_CHARS, exact=1)
6202
                                                 )
6203
                                       ).setParseAction(lambda t: t[0].strip()))
6204
                else:
6205
                    content = (empty.copy() + CharsNotIn(opener
6206
                                                         + closer
6207
                                                         + ParserElement.DEFAULT_WHITE_CHARS
6208
                                                         ).setParseAction(lambda t: t[0].strip()))
6209
            else:
6210
                if ignoreExpr is not None:
6211
                    content = (Combine(OneOrMore(~ignoreExpr
6212
                                                 + ~Literal(opener)
6213
                                                 + ~Literal(closer)
6214
                                                 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1))
6215
                                       ).setParseAction(lambda t: t[0].strip()))
6216
                else:
6217
                    content = (Combine(OneOrMore(~Literal(opener)
6218
                                                 + ~Literal(closer)
6219
                                                 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1))
6220
                                       ).setParseAction(lambda t: t[0].strip()))
6221
        else:
6222
            raise ValueError("opening and closing arguments must be strings if no content expression is given")
6223
    ret = Forward()
6224
    if ignoreExpr is not None:
6225
        ret <<= Group(Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer))
6226
    else:
6227
        ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content)  + Suppress(closer))
6228
    ret.setName('nested %s%s expression' % (opener, closer))
6229
    return ret
6230

6231
def indentedBlock(blockStatementExpr, indentStack, indent=True):
6232
    """Helper method for defining space-delimited indentation blocks,
6233
    such as those used to define block statements in Python source code.
6234

6235
    Parameters:
6236

6237
     - blockStatementExpr - expression defining syntax of statement that
6238
       is repeated within the indented block
6239
     - indentStack - list created by caller to manage indentation stack
6240
       (multiple statementWithIndentedBlock expressions within a single
6241
       grammar should share a common indentStack)
6242
     - indent - boolean indicating whether block must be indented beyond
6243
       the current level; set to False for block of left-most
6244
       statements (default= ``True``)
6245

6246
    A valid block must contain at least one ``blockStatement``.
6247

6248
    Example::
6249

6250
        data = '''
6251
        def A(z):
6252
          A1
6253
          B = 100
6254
          G = A2
6255
          A2
6256
          A3
6257
        B
6258
        def BB(a,b,c):
6259
          BB1
6260
          def BBA():
6261
            bba1
6262
            bba2
6263
            bba3
6264
        C
6265
        D
6266
        def spam(x,y):
6267
             def eggs(z):
6268
                 pass
6269
        '''
6270

6271

6272
        indentStack = [1]
6273
        stmt = Forward()
6274

6275
        identifier = Word(alphas, alphanums)
6276
        funcDecl = ("def" + identifier + Group("(" + Optional(delimitedList(identifier)) + ")") + ":")
6277
        func_body = indentedBlock(stmt, indentStack)
6278
        funcDef = Group(funcDecl + func_body)
6279

6280
        rvalue = Forward()
6281
        funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
6282
        rvalue << (funcCall | identifier | Word(nums))
6283
        assignment = Group(identifier + "=" + rvalue)
6284
        stmt << (funcDef | assignment | identifier)
6285

6286
        module_body = OneOrMore(stmt)
6287

6288
        parseTree = module_body.parseString(data)
6289
        parseTree.pprint()
6290

6291
    prints::
6292

6293
        [['def',
6294
          'A',
6295
          ['(', 'z', ')'],
6296
          ':',
6297
          [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
6298
         'B',
6299
         ['def',
6300
          'BB',
6301
          ['(', 'a', 'b', 'c', ')'],
6302
          ':',
6303
          [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
6304
         'C',
6305
         'D',
6306
         ['def',
6307
          'spam',
6308
          ['(', 'x', 'y', ')'],
6309
          ':',
6310
          [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
6311
    """
6312
    backup_stack = indentStack[:]
6313

6314
    def reset_stack():
6315
        indentStack[:] = backup_stack
6316

6317
    def checkPeerIndent(s, l, t):
6318
        if l >= len(s): return
6319
        curCol = col(l, s)
6320
        if curCol != indentStack[-1]:
6321
            if curCol > indentStack[-1]:
6322
                raise ParseException(s, l, "illegal nesting")
6323
            raise ParseException(s, l, "not a peer entry")
6324

6325
    def checkSubIndent(s, l, t):
6326
        curCol = col(l, s)
6327
        if curCol > indentStack[-1]:
6328
            indentStack.append(curCol)
6329
        else:
6330
            raise ParseException(s, l, "not a subentry")
6331

6332
    def checkUnindent(s, l, t):
6333
        if l >= len(s): return
6334
        curCol = col(l, s)
6335
        if not(indentStack and curCol in indentStack):
6336
            raise ParseException(s, l, "not an unindent")
6337
        if curCol < indentStack[-1]:
6338
            indentStack.pop()
6339

6340
    NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress(), stopOn=StringEnd())
6341
    INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
6342
    PEER   = Empty().setParseAction(checkPeerIndent).setName('')
6343
    UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
6344
    if indent:
6345
        smExpr = Group(Optional(NL)
6346
                       + INDENT
6347
                       + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd())
6348
                       + UNDENT)
6349
    else:
6350
        smExpr = Group(Optional(NL)
6351
                       + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd())
6352
                       + UNDENT)
6353
    smExpr.setFailAction(lambda a, b, c, d: reset_stack())
6354
    blockStatementExpr.ignore(_bslash + LineEnd())
6355
    return smExpr.setName('indented block')
6356

6357
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6358
punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6359

6360
anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums + "_:").setName('any tag'))
6361
_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), '><& "\''))
6362
commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
6363
def replaceHTMLEntity(t):
6364
    """Helper parser action to replace common HTML entities with their special characters"""
6365
    return _htmlEntityMap.get(t.entity)
6366

6367
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
6368
cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
6369
"Comment of the form ``/* ... */``"
6370

6371
htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
6372
"Comment of the form ``<!-- ... -->``"
6373

6374
restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
6375
dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
6376
"Comment of the form ``// ... (to end of line)``"
6377

6378
cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/' | dblSlashComment).setName("C++ style comment")
6379
"Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`"
6380

6381
javaStyleComment = cppStyleComment
6382
"Same as :class:`cppStyleComment`"
6383

6384
pythonStyleComment = Regex(r"#.*").setName("Python style comment")
6385
"Comment of the form ``# ... (to end of line)``"
6386

6387
_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',')
6388
                                  + Optional(Word(" \t")
6389
                                             + ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem")
6390
commaSeparatedList = delimitedList(Optional(quotedString.copy() | _commasepitem, default="")).setName("commaSeparatedList")
6391
"""(Deprecated) Predefined expression of 1 or more printable words or
6392
quoted strings, separated by commas.
6393

6394
This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`.
6395
"""
6396

6397
# some other useful expressions - using lower-case class name since we are really using this as a namespace
6398
class pyparsing_common:
6399
    """Here are some common low-level expressions that may be useful in
6400
    jump-starting parser development:
6401

6402
     - numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
6403
       :class:`scientific notation<sci_real>`)
6404
     - common :class:`programming identifiers<identifier>`
6405
     - network addresses (:class:`MAC<mac_address>`,
6406
       :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
6407
     - ISO8601 :class:`dates<iso8601_date>` and
6408
       :class:`datetime<iso8601_datetime>`
6409
     - :class:`UUID<uuid>`
6410
     - :class:`comma-separated list<comma_separated_list>`
6411

6412
    Parse actions:
6413

6414
     - :class:`convertToInteger`
6415
     - :class:`convertToFloat`
6416
     - :class:`convertToDate`
6417
     - :class:`convertToDatetime`
6418
     - :class:`stripHTMLTags`
6419
     - :class:`upcaseTokens`
6420
     - :class:`downcaseTokens`
6421

6422
    Example::
6423

6424
        pyparsing_common.number.runTests('''
6425
            # any int or real number, returned as the appropriate type
6426
            100
6427
            -100
6428
            +100
6429
            3.14159
6430
            6.02e23
6431
            1e-12
6432
            ''')
6433

6434
        pyparsing_common.fnumber.runTests('''
6435
            # any int or real number, returned as float
6436
            100
6437
            -100
6438
            +100
6439
            3.14159
6440
            6.02e23
6441
            1e-12
6442
            ''')
6443

6444
        pyparsing_common.hex_integer.runTests('''
6445
            # hex numbers
6446
            100
6447
            FF
6448
            ''')
6449

6450
        pyparsing_common.fraction.runTests('''
6451
            # fractions
6452
            1/2
6453
            -3/4
6454
            ''')
6455

6456
        pyparsing_common.mixed_integer.runTests('''
6457
            # mixed fractions
6458
            1
6459
            1/2
6460
            -3/4
6461
            1-3/4
6462
            ''')
6463

6464
        import uuid
6465
        pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
6466
        pyparsing_common.uuid.runTests('''
6467
            # uuid
6468
            12345678-1234-5678-1234-567812345678
6469
            ''')
6470

6471
    prints::
6472

6473
        # any int or real number, returned as the appropriate type
6474
        100
6475
        [100]
6476

6477
        -100
6478
        [-100]
6479

6480
        +100
6481
        [100]
6482

6483
        3.14159
6484
        [3.14159]
6485

6486
        6.02e23
6487
        [6.02e+23]
6488

6489
        1e-12
6490
        [1e-12]
6491

6492
        # any int or real number, returned as float
6493
        100
6494
        [100.0]
6495

6496
        -100
6497
        [-100.0]
6498

6499
        +100
6500
        [100.0]
6501

6502
        3.14159
6503
        [3.14159]
6504

6505
        6.02e23
6506
        [6.02e+23]
6507

6508
        1e-12
6509
        [1e-12]
6510

6511
        # hex numbers
6512
        100
6513
        [256]
6514

6515
        FF
6516
        [255]
6517

6518
        # fractions
6519
        1/2
6520
        [0.5]
6521

6522
        -3/4
6523
        [-0.75]
6524

6525
        # mixed fractions
6526
        1
6527
        [1]
6528

6529
        1/2
6530
        [0.5]
6531

6532
        -3/4
6533
        [-0.75]
6534

6535
        1-3/4
6536
        [1.75]
6537

6538
        # uuid
6539
        12345678-1234-5678-1234-567812345678
6540
        [UUID('12345678-1234-5678-1234-567812345678')]
6541
    """
6542

6543
    convertToInteger = tokenMap(int)
6544
    """
6545
    Parse action for converting parsed integers to Python int
6546
    """
6547

6548
    convertToFloat = tokenMap(float)
6549
    """
6550
    Parse action for converting parsed numbers to Python float
6551
    """
6552

6553
    integer = Word(nums).setName("integer").setParseAction(convertToInteger)
6554
    """expression that parses an unsigned integer, returns an int"""
6555

6556
    hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int, 16))
6557
    """expression that parses a hexadecimal integer, returns an int"""
6558

6559
    signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
6560
    """expression that parses an integer with optional leading sign, returns an int"""
6561

6562
    fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
6563
    """fractional expression of an integer divided by an integer, returns a float"""
6564
    fraction.addParseAction(lambda t: t[0]/t[-1])
6565

6566
    mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
6567
    """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
6568
    mixed_integer.addParseAction(sum)
6569

6570
    real = Regex(r'[+-]?(?:\d+\.\d*|\.\d+)').setName("real number").setParseAction(convertToFloat)
6571
    """expression that parses a floating point number and returns a float"""
6572

6573
    sci_real = Regex(r'[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
6574
    """expression that parses a floating point number with optional
6575
    scientific notation and returns a float"""
6576

6577
    # streamlining this expression makes the docs nicer-looking
6578
    number = (sci_real | real | signed_integer).streamline()
6579
    """any numeric expression, returns the corresponding Python type"""
6580

6581
    fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
6582
    """any int or real number, returned as float"""
6583

6584
    identifier = Word(alphas + '_', alphanums + '_').setName("identifier")
6585
    """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
6586

6587
    ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
6588
    "IPv4 address (``0.0.0.0 - 255.255.255.255``)"
6589

6590
    _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
6591
    _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part) * 7).setName("full IPv6 address")
6592
    _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6))
6593
                           + "::"
6594
                           + Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6))
6595
                           ).setName("short IPv6 address")
6596
    _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
6597
    _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
6598
    ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
6599
    "IPv6 address (long, short, or mixed form)"
6600

6601
    mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
6602
    "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
6603

6604
    @staticmethod
6605
    def convertToDate(fmt="%Y-%m-%d"):
6606
        """
6607
        Helper to create a parse action for converting parsed date string to Python datetime.date
6608

6609
        Params -
6610
         - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
6611

6612
        Example::
6613

6614
            date_expr = pyparsing_common.iso8601_date.copy()
6615
            date_expr.setParseAction(pyparsing_common.convertToDate())
6616
            print(date_expr.parseString("1999-12-31"))
6617

6618
        prints::
6619

6620
            [datetime.date(1999, 12, 31)]
6621
        """
6622
        def cvt_fn(s, l, t):
6623
            try:
6624
                return datetime.strptime(t[0], fmt).date()
6625
            except ValueError as ve:
6626
                raise ParseException(s, l, str(ve))
6627
        return cvt_fn
6628

6629
    @staticmethod
6630
    def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
6631
        """Helper to create a parse action for converting parsed
6632
        datetime string to Python datetime.datetime
6633

6634
        Params -
6635
         - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
6636

6637
        Example::
6638

6639
            dt_expr = pyparsing_common.iso8601_datetime.copy()
6640
            dt_expr.setParseAction(pyparsing_common.convertToDatetime())
6641
            print(dt_expr.parseString("1999-12-31T23:59:59.999"))
6642

6643
        prints::
6644

6645
            [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
6646
        """
6647
        def cvt_fn(s, l, t):
6648
            try:
6649
                return datetime.strptime(t[0], fmt)
6650
            except ValueError as ve:
6651
                raise ParseException(s, l, str(ve))
6652
        return cvt_fn
6653

6654
    iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
6655
    "ISO8601 date (``yyyy-mm-dd``)"
6656

6657
    iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
6658
    "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
6659

6660
    uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
6661
    "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
6662

6663
    _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
6664
    @staticmethod
6665
    def stripHTMLTags(s, l, tokens):
6666
        """Parse action to remove HTML tags from web page HTML source
6667

6668
        Example::
6669

6670
            # strip HTML links from normal text
6671
            text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
6672
            td, td_end = makeHTMLTags("TD")
6673
            table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
6674
            print(table_text.parseString(text).body)
6675

6676
        Prints::
6677

6678
            More info at the pyparsing wiki page
6679
        """
6680
        return pyparsing_common._html_stripper.transformString(tokens[0])
6681

6682
    _commasepitem = Combine(OneOrMore(~Literal(",")
6683
                                      + ~LineEnd()
6684
                                      + Word(printables, excludeChars=',')
6685
                                      + Optional(White(" \t")))).streamline().setName("commaItem")
6686
    comma_separated_list = delimitedList(Optional(quotedString.copy()
6687
                                                  | _commasepitem, default='')
6688
                                         ).setName("comma separated list")
6689
    """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
6690

6691
    upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
6692
    """Parse action to convert tokens to upper case."""
6693

6694
    downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
6695
    """Parse action to convert tokens to lower case."""
6696

6697

6698
class _lazyclassproperty(object):
6699
    def __init__(self, fn):
6700
        self.fn = fn
6701
        self.__doc__ = fn.__doc__
6702
        self.__name__ = fn.__name__
6703

6704
    def __get__(self, obj, cls):
6705
        if cls is None:
6706
            cls = type(obj)
6707
        if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', [])
6708
                                              for superclass in cls.__mro__[1:]):
6709
            cls._intern = {}
6710
        attrname = self.fn.__name__
6711
        if attrname not in cls._intern:
6712
            cls._intern[attrname] = self.fn(cls)
6713
        return cls._intern[attrname]
6714

6715

6716
class unicode_set(object):
6717
    """
6718
    A set of Unicode characters, for language-specific strings for
6719
    ``alphas``, ``nums``, ``alphanums``, and ``printables``.
6720
    A unicode_set is defined by a list of ranges in the Unicode character
6721
    set, in a class attribute ``_ranges``, such as::
6722

6723
        _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
6724

6725
    A unicode set can also be defined using multiple inheritance of other unicode sets::
6726

6727
        class CJK(Chinese, Japanese, Korean):
6728
            pass
6729
    """
6730
    _ranges = []
6731

6732
    @classmethod
6733
    def _get_chars_for_ranges(cls):
6734
        ret = []
6735
        for cc in cls.__mro__:
6736
            if cc is unicode_set:
6737
                break
6738
            for rr in cc._ranges:
6739
                ret.extend(range(rr[0], rr[-1] + 1))
6740
        return [unichr(c) for c in sorted(set(ret))]
6741

6742
    @_lazyclassproperty
6743
    def printables(cls):
6744
        "all non-whitespace characters in this range"
6745
        return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges()))
6746

6747
    @_lazyclassproperty
6748
    def alphas(cls):
6749
        "all alphabetic characters in this range"
6750
        return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges()))
6751

6752
    @_lazyclassproperty
6753
    def nums(cls):
6754
        "all numeric digit characters in this range"
6755
        return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges()))
6756

6757
    @_lazyclassproperty
6758
    def alphanums(cls):
6759
        "all alphanumeric characters in this range"
6760
        return cls.alphas + cls.nums
6761

6762

6763
class pyparsing_unicode(unicode_set):
6764
    """
6765
    A namespace class for defining common language unicode_sets.
6766
    """
6767
    _ranges = [(32, sys.maxunicode)]
6768

6769
    class Latin1(unicode_set):
6770
        "Unicode set for Latin-1 Unicode Character Range"
6771
        _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
6772

6773
    class LatinA(unicode_set):
6774
        "Unicode set for Latin-A Unicode Character Range"
6775
        _ranges = [(0x0100, 0x017f),]
6776

6777
    class LatinB(unicode_set):
6778
        "Unicode set for Latin-B Unicode Character Range"
6779
        _ranges = [(0x0180, 0x024f),]
6780

6781
    class Greek(unicode_set):
6782
        "Unicode set for Greek Unicode Character Ranges"
6783
        _ranges = [
6784
            (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d),
6785
            (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4),
6786
            (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe),
6787
        ]
6788

6789
    class Cyrillic(unicode_set):
6790
        "Unicode set for Cyrillic Unicode Character Range"
6791
        _ranges = [(0x0400, 0x04ff)]
6792

6793
    class Chinese(unicode_set):
6794
        "Unicode set for Chinese Unicode Character Range"
6795
        _ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f),]
6796

6797
    class Japanese(unicode_set):
6798
        "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
6799
        _ranges = []
6800

6801
        class Kanji(unicode_set):
6802
            "Unicode set for Kanji Unicode Character Range"
6803
            _ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f),]
6804

6805
        class Hiragana(unicode_set):
6806
            "Unicode set for Hiragana Unicode Character Range"
6807
            _ranges = [(0x3040, 0x309f),]
6808

6809
        class Katakana(unicode_set):
6810
            "Unicode set for Katakana  Unicode Character Range"
6811
            _ranges = [(0x30a0, 0x30ff),]
6812

6813
    class Korean(unicode_set):
6814
        "Unicode set for Korean Unicode Character Range"
6815
        _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f),]
6816

6817
    class CJK(Chinese, Japanese, Korean):
6818
        "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
6819
        pass
6820

6821
    class Thai(unicode_set):
6822
        "Unicode set for Thai Unicode Character Range"
6823
        _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b),]
6824

6825
    class Arabic(unicode_set):
6826
        "Unicode set for Arabic Unicode Character Range"
6827
        _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f),]
6828

6829
    class Hebrew(unicode_set):
6830
        "Unicode set for Hebrew Unicode Character Range"
6831
        _ranges = [(0x0590, 0x05ff),]
6832

6833
    class Devanagari(unicode_set):
6834
        "Unicode set for Devanagari Unicode Character Range"
6835
        _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)]
6836

6837
pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges
6838
                                      + pyparsing_unicode.Japanese.Hiragana._ranges
6839
                                      + pyparsing_unicode.Japanese.Katakana._ranges)
6840

6841
# define ranges in language character sets
6842
if PY_3:
6843
    setattr(pyparsing_unicode, u"العربية", pyparsing_unicode.Arabic)
6844
    setattr(pyparsing_unicode, u"中文", pyparsing_unicode.Chinese)
6845
    setattr(pyparsing_unicode, u"кириллица", pyparsing_unicode.Cyrillic)
6846
    setattr(pyparsing_unicode, u"Ελληνικά", pyparsing_unicode.Greek)
6847
    setattr(pyparsing_unicode, u"עִברִית", pyparsing_unicode.Hebrew)
6848
    setattr(pyparsing_unicode, u"日本語", pyparsing_unicode.Japanese)
6849
    setattr(pyparsing_unicode.Japanese, u"漢字", pyparsing_unicode.Japanese.Kanji)
6850
    setattr(pyparsing_unicode.Japanese, u"カタカナ", pyparsing_unicode.Japanese.Katakana)
6851
    setattr(pyparsing_unicode.Japanese, u"ひらがな", pyparsing_unicode.Japanese.Hiragana)
6852
    setattr(pyparsing_unicode, u"한국어", pyparsing_unicode.Korean)
6853
    setattr(pyparsing_unicode, u"ไทย", pyparsing_unicode.Thai)
6854
    setattr(pyparsing_unicode, u"देवनागरी", pyparsing_unicode.Devanagari)
6855

6856

6857
class pyparsing_test:
6858
    """
6859
    namespace class for classes useful in writing unit tests
6860
    """
6861

6862
    class reset_pyparsing_context:
6863
        """
6864
        Context manager to be used when writing unit tests that modify pyparsing config values:
6865
         - packrat parsing
6866
         - default whitespace characters.
6867
         - default keyword characters
6868
         - literal string auto-conversion class
6869
         - __diag__ settings
6870

6871
        Example:
6872
            with reset_pyparsing_context():
6873
                # test that literals used to construct a grammar are automatically suppressed
6874
                ParserElement.inlineLiteralsUsing(Suppress)
6875

6876
                term = Word(alphas) | Word(nums)
6877
                group = Group('(' + term[...] + ')')
6878

6879
                # assert that the '()' characters are not included in the parsed tokens
6880
                self.assertParseAndCheckLisst(group, "(abc 123 def)", ['abc', '123', 'def'])
6881

6882
            # after exiting context manager, literals are converted to Literal expressions again
6883
        """
6884

6885
        def __init__(self):
6886
            self._save_context = {}
6887

6888
        def save(self):
6889
            self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS
6890
            self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS
6891
            self._save_context[
6892
                "literal_string_class"
6893
            ] = ParserElement._literalStringClass
6894
            self._save_context["packrat_enabled"] = ParserElement._packratEnabled
6895
            self._save_context["packrat_parse"] = ParserElement._parse
6896
            self._save_context["__diag__"] = {
6897
                name: getattr(__diag__, name) for name in __diag__._all_names
6898
            }
6899
            self._save_context["__compat__"] = {
6900
                "collect_all_And_tokens": __compat__.collect_all_And_tokens
6901
            }
6902
            return self
6903

6904
        def restore(self):
6905
            # reset pyparsing global state
6906
            if (
6907
                ParserElement.DEFAULT_WHITE_CHARS
6908
                != self._save_context["default_whitespace"]
6909
            ):
6910
                ParserElement.setDefaultWhitespaceChars(
6911
                    self._save_context["default_whitespace"]
6912
                )
6913
            Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"]
6914
            ParserElement.inlineLiteralsUsing(
6915
                self._save_context["literal_string_class"]
6916
            )
6917
            for name, value in self._save_context["__diag__"].items():
6918
                setattr(__diag__, name, value)
6919
            ParserElement._packratEnabled = self._save_context["packrat_enabled"]
6920
            ParserElement._parse = self._save_context["packrat_parse"]
6921
            __compat__.collect_all_And_tokens = self._save_context["__compat__"]
6922

6923
        def __enter__(self):
6924
            return self.save()
6925

6926
        def __exit__(self, *args):
6927
            return self.restore()
6928

6929
    class TestParseResultsAsserts:
6930
        """
6931
        A mixin class to add parse results assertion methods to normal unittest.TestCase classes.
6932
        """
6933
        def assertParseResultsEquals(
6934
            self, result, expected_list=None, expected_dict=None, msg=None
6935
        ):
6936
            """
6937
            Unit test assertion to compare a ParseResults object with an optional expected_list,
6938
            and compare any defined results names with an optional expected_dict.
6939
            """
6940
            if expected_list is not None:
6941
                self.assertEqual(expected_list, result.asList(), msg=msg)
6942
            if expected_dict is not None:
6943
                self.assertEqual(expected_dict, result.asDict(), msg=msg)
6944

6945
        def assertParseAndCheckList(
6946
            self, expr, test_string, expected_list, msg=None, verbose=True
6947
        ):
6948
            """
6949
            Convenience wrapper assert to test a parser element and input string, and assert that
6950
            the resulting ParseResults.asList() is equal to the expected_list.
6951
            """
6952
            result = expr.parseString(test_string, parseAll=True)
6953
            if verbose:
6954
                print(result.dump())
6955
            self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg)
6956

6957
        def assertParseAndCheckDict(
6958
            self, expr, test_string, expected_dict, msg=None, verbose=True
6959
        ):
6960
            """
6961
            Convenience wrapper assert to test a parser element and input string, and assert that
6962
            the resulting ParseResults.asDict() is equal to the expected_dict.
6963
            """
6964
            result = expr.parseString(test_string, parseAll=True)
6965
            if verbose:
6966
                print(result.dump())
6967
            self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg)
6968

6969
        def assertRunTestResults(
6970
            self, run_tests_report, expected_parse_results=None, msg=None
6971
        ):
6972
            """
6973
            Unit test assertion to evaluate output of ParserElement.runTests(). If a list of
6974
            list-dict tuples is given as the expected_parse_results argument, then these are zipped
6975
            with the report tuples returned by runTests and evaluated using assertParseResultsEquals.
6976
            Finally, asserts that the overall runTests() success value is True.
6977

6978
            :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests
6979
            :param expected_parse_results (optional): [tuple(str, list, dict, Exception)]
6980
            """
6981
            run_test_success, run_test_results = run_tests_report
6982

6983
            if expected_parse_results is not None:
6984
                merged = [
6985
                    (rpt[0], rpt[1], expected)
6986
                    for rpt, expected in zip(run_test_results, expected_parse_results)
6987
                ]
6988
                for test_string, result, expected in merged:
6989
                    # expected should be a tuple containing a list and/or a dict or an exception,
6990
                    # and optional failure message string
6991
                    # an empty tuple will skip any result validation
6992
                    fail_msg = next(
6993
                        (exp for exp in expected if isinstance(exp, str)), None
6994
                    )
6995
                    expected_exception = next(
6996
                        (
6997
                            exp
6998
                            for exp in expected
6999
                            if isinstance(exp, type) and issubclass(exp, Exception)
7000
                        ),
7001
                        None,
7002
                    )
7003
                    if expected_exception is not None:
7004
                        with self.assertRaises(
7005
                            expected_exception=expected_exception, msg=fail_msg or msg
7006
                        ):
7007
                            if isinstance(result, Exception):
7008
                                raise result
7009
                    else:
7010
                        expected_list = next(
7011
                            (exp for exp in expected if isinstance(exp, list)), None
7012
                        )
7013
                        expected_dict = next(
7014
                            (exp for exp in expected if isinstance(exp, dict)), None
7015
                        )
7016
                        if (expected_list, expected_dict) != (None, None):
7017
                            self.assertParseResultsEquals(
7018
                                result,
7019
                                expected_list=expected_list,
7020
                                expected_dict=expected_dict,
7021
                                msg=fail_msg or msg,
7022
                            )
7023
                        else:
7024
                            # warning here maybe?
7025
                            print("no validation for {!r}".format(test_string))
7026

7027
            # do this last, in case some specific test results can be reported instead
7028
            self.assertTrue(
7029
                run_test_success, msg=msg if msg is not None else "failed runTests"
7030
            )
7031

7032
        @contextmanager
7033
        def assertRaisesParseException(self, exc_type=ParseException, msg=None):
7034
            with self.assertRaises(exc_type, msg=msg):
7035
                yield
7036

7037

7038
if __name__ == "__main__":
7039

7040
    selectToken    = CaselessLiteral("select")
7041
    fromToken      = CaselessLiteral("from")
7042

7043
    ident          = Word(alphas, alphanums + "_$")
7044

7045
    columnName     = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
7046
    columnNameList = Group(delimitedList(columnName)).setName("columns")
7047
    columnSpec     = ('*' | columnNameList)
7048

7049
    tableName      = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
7050
    tableNameList  = Group(delimitedList(tableName)).setName("tables")
7051

7052
    simpleSQL      = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
7053

7054
    # demo runTests method, including embedded comments in test string
7055
    simpleSQL.runTests("""
7056
        # '*' as column list and dotted table name
7057
        select * from SYS.XYZZY
7058

7059
        # caseless match on "SELECT", and casts back to "select"
7060
        SELECT * from XYZZY, ABC
7061

7062
        # list of column names, and mixed case SELECT keyword
7063
        Select AA,BB,CC from Sys.dual
7064

7065
        # multiple tables
7066
        Select A, B, C from Sys.dual, Table2
7067

7068
        # invalid SELECT keyword - should fail
7069
        Xelect A, B, C from Sys.dual
7070

7071
        # incomplete command - should fail
7072
        Select
7073

7074
        # invalid column name - should fail
7075
        Select ^^^ frox Sys.dual
7076

7077
        """)
7078

7079
    pyparsing_common.number.runTests("""
7080
        100
7081
        -100
7082
        +100
7083
        3.14159
7084
        6.02e23
7085
        1e-12
7086
        """)
7087

7088
    # any int or real number, returned as float
7089
    pyparsing_common.fnumber.runTests("""
7090
        100
7091
        -100
7092
        +100
7093
        3.14159
7094
        6.02e23
7095
        1e-12
7096
        """)
7097

7098
    pyparsing_common.hex_integer.runTests("""
7099
        100
7100
        FF
7101
        """)
7102

7103
    import uuid
7104
    pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
7105
    pyparsing_common.uuid.runTests("""
7106
        12345678-1234-5678-1234-567812345678
7107
        """)
7108

7109
Product

Resources

Company