Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hhhrrrttt222111
GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/pip/_vendor/pyparsing.py
811 views
1
# -*- coding: utf-8 -*-
2
# module pyparsing.py
3
#
4
# Copyright (c) 2003-2019 Paul T. McGuire
5
#
6
# Permission is hereby granted, free of charge, to any person obtaining
7
# a copy of this software and associated documentation files (the
8
# "Software"), to deal in the Software without restriction, including
9
# without limitation the rights to use, copy, modify, merge, publish,
10
# distribute, sublicense, and/or sell copies of the Software, and to
11
# permit persons to whom the Software is furnished to do so, subject to
12
# the following conditions:
13
#
14
# The above copyright notice and this permission notice shall be
15
# included in all copies or substantial portions of the Software.
16
#
17
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
#
25
26
__doc__ = \
27
"""
28
pyparsing module - Classes and methods to define and execute parsing grammars
29
=============================================================================
30
31
The pyparsing module is an alternative approach to creating and
32
executing simple grammars, vs. the traditional lex/yacc approach, or the
33
use of regular expressions. With pyparsing, you don't need to learn
34
a new syntax for defining grammars or matching expressions - the parsing
35
module provides a library of classes that you use to construct the
36
grammar directly in Python.
37
38
Here is a program to parse "Hello, World!" (or any greeting of the form
39
``"<salutation>, <addressee>!"``), built up using :class:`Word`,
40
:class:`Literal`, and :class:`And` elements
41
(the :class:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
42
and the strings are auto-converted to :class:`Literal` expressions)::
43
44
from pip._vendor.pyparsing import Word, alphas
45
46
# define grammar of a greeting
47
greet = Word(alphas) + "," + Word(alphas) + "!"
48
49
hello = "Hello, World!"
50
print (hello, "->", greet.parseString(hello))
51
52
The program outputs the following::
53
54
Hello, World! -> ['Hello', ',', 'World', '!']
55
56
The Python representation of the grammar is quite readable, owing to the
57
self-explanatory class names, and the use of '+', '|' and '^' operators.
58
59
The :class:`ParseResults` object returned from
60
:class:`ParserElement.parseString` can be
61
accessed as a nested list, a dictionary, or an object with named
62
attributes.
63
64
The pyparsing module handles some of the problems that are typically
65
vexing when writing text parsers:
66
67
- extra or missing whitespace (the above program will also handle
68
"Hello,World!", "Hello , World !", etc.)
69
- quoted strings
70
- embedded comments
71
72
73
Getting Started -
74
-----------------
75
Visit the classes :class:`ParserElement` and :class:`ParseResults` to
76
see the base classes that most other pyparsing
77
classes inherit from. Use the docstrings for examples of how to:
78
79
- construct literal match expressions from :class:`Literal` and
80
:class:`CaselessLiteral` classes
81
- construct character word-group expressions using the :class:`Word`
82
class
83
- see how to create repetitive expressions using :class:`ZeroOrMore`
84
and :class:`OneOrMore` classes
85
- use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
86
and :class:`'&'<Each>` operators to combine simple expressions into
87
more complex ones
88
- associate names with your parsed results using
89
:class:`ParserElement.setResultsName`
90
- access the parsed data, which is returned as a :class:`ParseResults`
91
object
92
- find some helpful expression short-cuts like :class:`delimitedList`
93
and :class:`oneOf`
94
- find more useful common expressions in the :class:`pyparsing_common`
95
namespace class
96
"""
97
98
__version__ = "2.4.7"
99
__versionTime__ = "30 Mar 2020 00:43 UTC"
100
__author__ = "Paul McGuire <[email protected]>"
101
102
import string
103
from weakref import ref as wkref
104
import copy
105
import sys
106
import warnings
107
import re
108
import sre_constants
109
import collections
110
import pprint
111
import traceback
112
import types
113
from datetime import datetime
114
from operator import itemgetter
115
import itertools
116
from functools import wraps
117
from contextlib import contextmanager
118
119
try:
120
# Python 3
121
from itertools import filterfalse
122
except ImportError:
123
from itertools import ifilterfalse as filterfalse
124
125
try:
126
from _thread import RLock
127
except ImportError:
128
from threading import RLock
129
130
try:
131
# Python 3
132
from collections.abc import Iterable
133
from collections.abc import MutableMapping, Mapping
134
except ImportError:
135
# Python 2.7
136
from collections import Iterable
137
from collections import MutableMapping, Mapping
138
139
try:
140
from collections import OrderedDict as _OrderedDict
141
except ImportError:
142
try:
143
from ordereddict import OrderedDict as _OrderedDict
144
except ImportError:
145
_OrderedDict = None
146
147
try:
148
from types import SimpleNamespace
149
except ImportError:
150
class SimpleNamespace: pass
151
152
# version compatibility configuration
153
__compat__ = SimpleNamespace()
154
__compat__.__doc__ = """
155
A cross-version compatibility configuration for pyparsing features that will be
156
released in a future version. By setting values in this configuration to True,
157
those features can be enabled in prior versions for compatibility development
158
and testing.
159
160
- collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping
161
of results names when an And expression is nested within an Or or MatchFirst; set to
162
True to enable bugfix released in pyparsing 2.3.0, or False to preserve
163
pre-2.3.0 handling of named results
164
"""
165
__compat__.collect_all_And_tokens = True
166
167
__diag__ = SimpleNamespace()
168
__diag__.__doc__ = """
169
Diagnostic configuration (all default to False)
170
- warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results
171
name is defined on a MatchFirst or Or expression with one or more And subexpressions
172
(only warns if __compat__.collect_all_And_tokens is False)
173
- warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results
174
name is defined on a containing expression with ungrouped subexpressions that also
175
have results names
176
- warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined
177
with a results name, but has no contents defined
178
- warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is
179
incorrectly called with multiple str arguments
180
- enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
181
calls to ParserElement.setName()
182
"""
183
__diag__.warn_multiple_tokens_in_named_alternation = False
184
__diag__.warn_ungrouped_named_tokens_in_collection = False
185
__diag__.warn_name_set_on_empty_Forward = False
186
__diag__.warn_on_multiple_string_args_to_oneof = False
187
__diag__.enable_debug_on_named_expressions = False
188
__diag__._all_names = [nm for nm in vars(__diag__) if nm.startswith("enable_") or nm.startswith("warn_")]
189
190
def _enable_all_warnings():
191
__diag__.warn_multiple_tokens_in_named_alternation = True
192
__diag__.warn_ungrouped_named_tokens_in_collection = True
193
__diag__.warn_name_set_on_empty_Forward = True
194
__diag__.warn_on_multiple_string_args_to_oneof = True
195
__diag__.enable_all_warnings = _enable_all_warnings
196
197
198
__all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag__',
199
'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
200
'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
201
'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
202
'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
203
'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
204
'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
205
'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char',
206
'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
207
'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
208
'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
209
'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
210
'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
211
'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
212
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
213
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
214
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
215
'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 'locatedExpr', 'withClass',
216
'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
217
'conditionAsParseAction', 're',
218
]
219
220
system_version = tuple(sys.version_info)[:3]
221
PY_3 = system_version[0] == 3
222
if PY_3:
223
_MAX_INT = sys.maxsize
224
basestring = str
225
unichr = chr
226
unicode = str
227
_ustr = str
228
229
# build list of single arg builtins, that can be used as parse actions
230
singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
231
232
else:
233
_MAX_INT = sys.maxint
234
range = xrange
235
236
def _ustr(obj):
237
"""Drop-in replacement for str(obj) that tries to be Unicode
238
friendly. It first tries str(obj). If that fails with
239
a UnicodeEncodeError, then it tries unicode(obj). It then
240
< returns the unicode object | encodes it with the default
241
encoding | ... >.
242
"""
243
if isinstance(obj, unicode):
244
return obj
245
246
try:
247
# If this works, then _ustr(obj) has the same behaviour as str(obj), so
248
# it won't break any existing code.
249
return str(obj)
250
251
except UnicodeEncodeError:
252
# Else encode it
253
ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
254
xmlcharref = Regex(r'&#\d+;')
255
xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
256
return xmlcharref.transformString(ret)
257
258
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
259
singleArgBuiltins = []
260
import __builtin__
261
262
for fname in "sum len sorted reversed list tuple set any all min max".split():
263
try:
264
singleArgBuiltins.append(getattr(__builtin__, fname))
265
except AttributeError:
266
continue
267
268
_generatorType = type((y for y in range(1)))
269
270
def _xml_escape(data):
271
"""Escape &, <, >, ", ', etc. in a string of data."""
272
273
# ampersand must be replaced first
274
from_symbols = '&><"\''
275
to_symbols = ('&' + s + ';' for s in "amp gt lt quot apos".split())
276
for from_, to_ in zip(from_symbols, to_symbols):
277
data = data.replace(from_, to_)
278
return data
279
280
alphas = string.ascii_uppercase + string.ascii_lowercase
281
nums = "0123456789"
282
hexnums = nums + "ABCDEFabcdef"
283
alphanums = alphas + nums
284
_bslash = chr(92)
285
printables = "".join(c for c in string.printable if c not in string.whitespace)
286
287
288
def conditionAsParseAction(fn, message=None, fatal=False):
289
msg = message if message is not None else "failed user-defined condition"
290
exc_type = ParseFatalException if fatal else ParseException
291
fn = _trim_arity(fn)
292
293
@wraps(fn)
294
def pa(s, l, t):
295
if not bool(fn(s, l, t)):
296
raise exc_type(s, l, msg)
297
298
return pa
299
300
class ParseBaseException(Exception):
301
"""base exception class for all parsing runtime exceptions"""
302
# Performance tuning: we construct a *lot* of these, so keep this
303
# constructor as small and fast as possible
304
def __init__(self, pstr, loc=0, msg=None, elem=None):
305
self.loc = loc
306
if msg is None:
307
self.msg = pstr
308
self.pstr = ""
309
else:
310
self.msg = msg
311
self.pstr = pstr
312
self.parserElement = elem
313
self.args = (pstr, loc, msg)
314
315
@classmethod
316
def _from_exception(cls, pe):
317
"""
318
internal factory method to simplify creating one type of ParseException
319
from another - avoids having __init__ signature conflicts among subclasses
320
"""
321
return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
322
323
def __getattr__(self, aname):
324
"""supported attributes by name are:
325
- lineno - returns the line number of the exception text
326
- col - returns the column number of the exception text
327
- line - returns the line containing the exception text
328
"""
329
if aname == "lineno":
330
return lineno(self.loc, self.pstr)
331
elif aname in ("col", "column"):
332
return col(self.loc, self.pstr)
333
elif aname == "line":
334
return line(self.loc, self.pstr)
335
else:
336
raise AttributeError(aname)
337
338
def __str__(self):
339
if self.pstr:
340
if self.loc >= len(self.pstr):
341
foundstr = ', found end of text'
342
else:
343
foundstr = (', found %r' % self.pstr[self.loc:self.loc + 1]).replace(r'\\', '\\')
344
else:
345
foundstr = ''
346
return ("%s%s (at char %d), (line:%d, col:%d)" %
347
(self.msg, foundstr, self.loc, self.lineno, self.column))
348
def __repr__(self):
349
return _ustr(self)
350
def markInputline(self, markerString=">!<"):
351
"""Extracts the exception line from the input string, and marks
352
the location of the exception with a special symbol.
353
"""
354
line_str = self.line
355
line_column = self.column - 1
356
if markerString:
357
line_str = "".join((line_str[:line_column],
358
markerString, line_str[line_column:]))
359
return line_str.strip()
360
def __dir__(self):
361
return "lineno col line".split() + dir(type(self))
362
363
class ParseException(ParseBaseException):
364
"""
365
Exception thrown when parse expressions don't match class;
366
supported attributes by name are:
367
- lineno - returns the line number of the exception text
368
- col - returns the column number of the exception text
369
- line - returns the line containing the exception text
370
371
Example::
372
373
try:
374
Word(nums).setName("integer").parseString("ABC")
375
except ParseException as pe:
376
print(pe)
377
print("column: {}".format(pe.col))
378
379
prints::
380
381
Expected integer (at char 0), (line:1, col:1)
382
column: 1
383
384
"""
385
386
@staticmethod
387
def explain(exc, depth=16):
388
"""
389
Method to take an exception and translate the Python internal traceback into a list
390
of the pyparsing expressions that caused the exception to be raised.
391
392
Parameters:
393
394
- exc - exception raised during parsing (need not be a ParseException, in support
395
of Python exceptions that might be raised in a parse action)
396
- depth (default=16) - number of levels back in the stack trace to list expression
397
and function names; if None, the full stack trace names will be listed; if 0, only
398
the failing input line, marker, and exception string will be shown
399
400
Returns a multi-line string listing the ParserElements and/or function names in the
401
exception's stack trace.
402
403
Note: the diagnostic output will include string representations of the expressions
404
that failed to parse. These representations will be more helpful if you use `setName` to
405
give identifiable names to your expressions. Otherwise they will use the default string
406
forms, which may be cryptic to read.
407
408
explain() is only supported under Python 3.
409
"""
410
import inspect
411
412
if depth is None:
413
depth = sys.getrecursionlimit()
414
ret = []
415
if isinstance(exc, ParseBaseException):
416
ret.append(exc.line)
417
ret.append(' ' * (exc.col - 1) + '^')
418
ret.append("{0}: {1}".format(type(exc).__name__, exc))
419
420
if depth > 0:
421
callers = inspect.getinnerframes(exc.__traceback__, context=depth)
422
seen = set()
423
for i, ff in enumerate(callers[-depth:]):
424
frm = ff[0]
425
426
f_self = frm.f_locals.get('self', None)
427
if isinstance(f_self, ParserElement):
428
if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'):
429
continue
430
if f_self in seen:
431
continue
432
seen.add(f_self)
433
434
self_type = type(f_self)
435
ret.append("{0}.{1} - {2}".format(self_type.__module__,
436
self_type.__name__,
437
f_self))
438
elif f_self is not None:
439
self_type = type(f_self)
440
ret.append("{0}.{1}".format(self_type.__module__,
441
self_type.__name__))
442
else:
443
code = frm.f_code
444
if code.co_name in ('wrapper', '<module>'):
445
continue
446
447
ret.append("{0}".format(code.co_name))
448
449
depth -= 1
450
if not depth:
451
break
452
453
return '\n'.join(ret)
454
455
456
class ParseFatalException(ParseBaseException):
457
"""user-throwable exception thrown when inconsistent parse content
458
is found; stops all parsing immediately"""
459
pass
460
461
class ParseSyntaxException(ParseFatalException):
462
"""just like :class:`ParseFatalException`, but thrown internally
463
when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
464
that parsing is to stop immediately because an unbacktrackable
465
syntax error has been found.
466
"""
467
pass
468
469
#~ class ReparseException(ParseBaseException):
470
#~ """Experimental class - parse actions can raise this exception to cause
471
#~ pyparsing to reparse the input string:
472
#~ - with a modified input string, and/or
473
#~ - with a modified start location
474
#~ Set the values of the ReparseException in the constructor, and raise the
475
#~ exception in a parse action to cause pyparsing to use the new string/location.
476
#~ Setting the values as None causes no change to be made.
477
#~ """
478
#~ def __init_( self, newstring, restartLoc ):
479
#~ self.newParseText = newstring
480
#~ self.reparseLoc = restartLoc
481
482
class RecursiveGrammarException(Exception):
483
"""exception thrown by :class:`ParserElement.validate` if the
484
grammar could be improperly recursive
485
"""
486
def __init__(self, parseElementList):
487
self.parseElementTrace = parseElementList
488
489
def __str__(self):
490
return "RecursiveGrammarException: %s" % self.parseElementTrace
491
492
class _ParseResultsWithOffset(object):
493
def __init__(self, p1, p2):
494
self.tup = (p1, p2)
495
def __getitem__(self, i):
496
return self.tup[i]
497
def __repr__(self):
498
return repr(self.tup[0])
499
def setOffset(self, i):
500
self.tup = (self.tup[0], i)
501
502
class ParseResults(object):
503
"""Structured parse results, to provide multiple means of access to
504
the parsed data:
505
506
- as a list (``len(results)``)
507
- by list index (``results[0], results[1]``, etc.)
508
- by attribute (``results.<resultsName>`` - see :class:`ParserElement.setResultsName`)
509
510
Example::
511
512
integer = Word(nums)
513
date_str = (integer.setResultsName("year") + '/'
514
+ integer.setResultsName("month") + '/'
515
+ integer.setResultsName("day"))
516
# equivalent form:
517
# date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
518
519
# parseString returns a ParseResults object
520
result = date_str.parseString("1999/12/31")
521
522
def test(s, fn=repr):
523
print("%s -> %s" % (s, fn(eval(s))))
524
test("list(result)")
525
test("result[0]")
526
test("result['month']")
527
test("result.day")
528
test("'month' in result")
529
test("'minutes' in result")
530
test("result.dump()", str)
531
532
prints::
533
534
list(result) -> ['1999', '/', '12', '/', '31']
535
result[0] -> '1999'
536
result['month'] -> '12'
537
result.day -> '31'
538
'month' in result -> True
539
'minutes' in result -> False
540
result.dump() -> ['1999', '/', '12', '/', '31']
541
- day: 31
542
- month: 12
543
- year: 1999
544
"""
545
def __new__(cls, toklist=None, name=None, asList=True, modal=True):
546
if isinstance(toklist, cls):
547
return toklist
548
retobj = object.__new__(cls)
549
retobj.__doinit = True
550
return retobj
551
552
# Performance tuning: we construct a *lot* of these, so keep this
553
# constructor as small and fast as possible
554
def __init__(self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance):
555
if self.__doinit:
556
self.__doinit = False
557
self.__name = None
558
self.__parent = None
559
self.__accumNames = {}
560
self.__asList = asList
561
self.__modal = modal
562
if toklist is None:
563
toklist = []
564
if isinstance(toklist, list):
565
self.__toklist = toklist[:]
566
elif isinstance(toklist, _generatorType):
567
self.__toklist = list(toklist)
568
else:
569
self.__toklist = [toklist]
570
self.__tokdict = dict()
571
572
if name is not None and name:
573
if not modal:
574
self.__accumNames[name] = 0
575
if isinstance(name, int):
576
name = _ustr(name) # will always return a str, but use _ustr for consistency
577
self.__name = name
578
if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None, '', [])):
579
if isinstance(toklist, basestring):
580
toklist = [toklist]
581
if asList:
582
if isinstance(toklist, ParseResults):
583
self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0)
584
else:
585
self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0)
586
self[name].__name = name
587
else:
588
try:
589
self[name] = toklist[0]
590
except (KeyError, TypeError, IndexError):
591
self[name] = toklist
592
593
def __getitem__(self, i):
594
if isinstance(i, (int, slice)):
595
return self.__toklist[i]
596
else:
597
if i not in self.__accumNames:
598
return self.__tokdict[i][-1][0]
599
else:
600
return ParseResults([v[0] for v in self.__tokdict[i]])
601
602
def __setitem__(self, k, v, isinstance=isinstance):
603
if isinstance(v, _ParseResultsWithOffset):
604
self.__tokdict[k] = self.__tokdict.get(k, list()) + [v]
605
sub = v[0]
606
elif isinstance(k, (int, slice)):
607
self.__toklist[k] = v
608
sub = v
609
else:
610
self.__tokdict[k] = self.__tokdict.get(k, list()) + [_ParseResultsWithOffset(v, 0)]
611
sub = v
612
if isinstance(sub, ParseResults):
613
sub.__parent = wkref(self)
614
615
def __delitem__(self, i):
616
if isinstance(i, (int, slice)):
617
mylen = len(self.__toklist)
618
del self.__toklist[i]
619
620
# convert int to slice
621
if isinstance(i, int):
622
if i < 0:
623
i += mylen
624
i = slice(i, i + 1)
625
# get removed indices
626
removed = list(range(*i.indices(mylen)))
627
removed.reverse()
628
# fixup indices in token dictionary
629
for name, occurrences in self.__tokdict.items():
630
for j in removed:
631
for k, (value, position) in enumerate(occurrences):
632
occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
633
else:
634
del self.__tokdict[i]
635
636
def __contains__(self, k):
637
return k in self.__tokdict
638
639
def __len__(self):
640
return len(self.__toklist)
641
642
def __bool__(self):
643
return (not not self.__toklist)
644
__nonzero__ = __bool__
645
646
def __iter__(self):
647
return iter(self.__toklist)
648
649
def __reversed__(self):
650
return iter(self.__toklist[::-1])
651
652
def _iterkeys(self):
653
if hasattr(self.__tokdict, "iterkeys"):
654
return self.__tokdict.iterkeys()
655
else:
656
return iter(self.__tokdict)
657
658
def _itervalues(self):
659
return (self[k] for k in self._iterkeys())
660
661
def _iteritems(self):
662
return ((k, self[k]) for k in self._iterkeys())
663
664
if PY_3:
665
keys = _iterkeys
666
"""Returns an iterator of all named result keys."""
667
668
values = _itervalues
669
"""Returns an iterator of all named result values."""
670
671
items = _iteritems
672
"""Returns an iterator of all named result key-value tuples."""
673
674
else:
675
iterkeys = _iterkeys
676
"""Returns an iterator of all named result keys (Python 2.x only)."""
677
678
itervalues = _itervalues
679
"""Returns an iterator of all named result values (Python 2.x only)."""
680
681
iteritems = _iteritems
682
"""Returns an iterator of all named result key-value tuples (Python 2.x only)."""
683
684
def keys(self):
685
"""Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
686
return list(self.iterkeys())
687
688
def values(self):
689
"""Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
690
return list(self.itervalues())
691
692
def items(self):
693
"""Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
694
return list(self.iteritems())
695
696
def haskeys(self):
697
"""Since keys() returns an iterator, this method is helpful in bypassing
698
code that looks for the existence of any defined results names."""
699
return bool(self.__tokdict)
700
701
def pop(self, *args, **kwargs):
702
"""
703
Removes and returns item at specified index (default= ``last``).
704
Supports both ``list`` and ``dict`` semantics for ``pop()``. If
705
passed no argument or an integer argument, it will use ``list``
706
semantics and pop tokens from the list of parsed tokens. If passed
707
a non-integer argument (most likely a string), it will use ``dict``
708
semantics and pop the corresponding value from any defined results
709
names. A second default return value argument is supported, just as in
710
``dict.pop()``.
711
712
Example::
713
714
def remove_first(tokens):
715
tokens.pop(0)
716
print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
717
print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
718
719
label = Word(alphas)
720
patt = label("LABEL") + OneOrMore(Word(nums))
721
print(patt.parseString("AAB 123 321").dump())
722
723
# Use pop() in a parse action to remove named result (note that corresponding value is not
724
# removed from list form of results)
725
def remove_LABEL(tokens):
726
tokens.pop("LABEL")
727
return tokens
728
patt.addParseAction(remove_LABEL)
729
print(patt.parseString("AAB 123 321").dump())
730
731
prints::
732
733
['AAB', '123', '321']
734
- LABEL: AAB
735
736
['AAB', '123', '321']
737
"""
738
if not args:
739
args = [-1]
740
for k, v in kwargs.items():
741
if k == 'default':
742
args = (args[0], v)
743
else:
744
raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
745
if (isinstance(args[0], int)
746
or len(args) == 1
747
or args[0] in self):
748
index = args[0]
749
ret = self[index]
750
del self[index]
751
return ret
752
else:
753
defaultvalue = args[1]
754
return defaultvalue
755
756
def get(self, key, defaultValue=None):
757
"""
758
Returns named result matching the given key, or if there is no
759
such name, then returns the given ``defaultValue`` or ``None`` if no
760
``defaultValue`` is specified.
761
762
Similar to ``dict.get()``.
763
764
Example::
765
766
integer = Word(nums)
767
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
768
769
result = date_str.parseString("1999/12/31")
770
print(result.get("year")) # -> '1999'
771
print(result.get("hour", "not specified")) # -> 'not specified'
772
print(result.get("hour")) # -> None
773
"""
774
if key in self:
775
return self[key]
776
else:
777
return defaultValue
778
779
def insert(self, index, insStr):
780
"""
781
Inserts new element at location index in the list of parsed tokens.
782
783
Similar to ``list.insert()``.
784
785
Example::
786
787
print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
788
789
# use a parse action to insert the parse location in the front of the parsed results
790
def insert_locn(locn, tokens):
791
tokens.insert(0, locn)
792
print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
793
"""
794
self.__toklist.insert(index, insStr)
795
# fixup indices in token dictionary
796
for name, occurrences in self.__tokdict.items():
797
for k, (value, position) in enumerate(occurrences):
798
occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
799
800
def append(self, item):
801
"""
802
Add single element to end of ParseResults list of elements.
803
804
Example::
805
806
print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
807
808
# use a parse action to compute the sum of the parsed integers, and add it to the end
809
def append_sum(tokens):
810
tokens.append(sum(map(int, tokens)))
811
print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
812
"""
813
self.__toklist.append(item)
814
815
def extend(self, itemseq):
816
"""
817
Add sequence of elements to end of ParseResults list of elements.
818
819
Example::
820
821
patt = OneOrMore(Word(alphas))
822
823
# use a parse action to append the reverse of the matched strings, to make a palindrome
824
def make_palindrome(tokens):
825
tokens.extend(reversed([t[::-1] for t in tokens]))
826
return ''.join(tokens)
827
print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
828
"""
829
if isinstance(itemseq, ParseResults):
830
self.__iadd__(itemseq)
831
else:
832
self.__toklist.extend(itemseq)
833
834
def clear(self):
835
"""
836
Clear all elements and results names.
837
"""
838
del self.__toklist[:]
839
self.__tokdict.clear()
840
841
def __getattr__(self, name):
842
try:
843
return self[name]
844
except KeyError:
845
return ""
846
847
def __add__(self, other):
848
ret = self.copy()
849
ret += other
850
return ret
851
852
def __iadd__(self, other):
853
if other.__tokdict:
854
offset = len(self.__toklist)
855
addoffset = lambda a: offset if a < 0 else a + offset
856
otheritems = other.__tokdict.items()
857
otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
858
for k, vlist in otheritems for v in vlist]
859
for k, v in otherdictitems:
860
self[k] = v
861
if isinstance(v[0], ParseResults):
862
v[0].__parent = wkref(self)
863
864
self.__toklist += other.__toklist
865
self.__accumNames.update(other.__accumNames)
866
return self
867
868
def __radd__(self, other):
869
if isinstance(other, int) and other == 0:
870
# useful for merging many ParseResults using sum() builtin
871
return self.copy()
872
else:
873
# this may raise a TypeError - so be it
874
return other + self
875
876
def __repr__(self):
877
return "(%s, %s)" % (repr(self.__toklist), repr(self.__tokdict))
878
879
def __str__(self):
880
return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
881
882
def _asStringList(self, sep=''):
883
out = []
884
for item in self.__toklist:
885
if out and sep:
886
out.append(sep)
887
if isinstance(item, ParseResults):
888
out += item._asStringList()
889
else:
890
out.append(_ustr(item))
891
return out
892
893
def asList(self):
894
"""
895
Returns the parse results as a nested list of matching tokens, all converted to strings.
896
897
Example::
898
899
patt = OneOrMore(Word(alphas))
900
result = patt.parseString("sldkj lsdkj sldkj")
901
# even though the result prints in string-like form, it is actually a pyparsing ParseResults
902
print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
903
904
# Use asList() to create an actual list
905
result_list = result.asList()
906
print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
907
"""
908
return [res.asList() if isinstance(res, ParseResults) else res for res in self.__toklist]
909
910
def asDict(self):
911
"""
912
Returns the named parse results as a nested dictionary.
913
914
Example::
915
916
integer = Word(nums)
917
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
918
919
result = date_str.parseString('12/31/1999')
920
print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
921
922
result_dict = result.asDict()
923
print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
924
925
# even though a ParseResults supports dict-like access, sometime you just need to have a dict
926
import json
927
print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
928
print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
929
"""
930
if PY_3:
931
item_fn = self.items
932
else:
933
item_fn = self.iteritems
934
935
def toItem(obj):
936
if isinstance(obj, ParseResults):
937
if obj.haskeys():
938
return obj.asDict()
939
else:
940
return [toItem(v) for v in obj]
941
else:
942
return obj
943
944
return dict((k, toItem(v)) for k, v in item_fn())
945
946
def copy(self):
947
"""
948
Returns a new copy of a :class:`ParseResults` object.
949
"""
950
ret = ParseResults(self.__toklist)
951
ret.__tokdict = dict(self.__tokdict.items())
952
ret.__parent = self.__parent
953
ret.__accumNames.update(self.__accumNames)
954
ret.__name = self.__name
955
return ret
956
957
def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True):
958
"""
959
(Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
960
"""
961
nl = "\n"
962
out = []
963
namedItems = dict((v[1], k) for (k, vlist) in self.__tokdict.items()
964
for v in vlist)
965
nextLevelIndent = indent + " "
966
967
# collapse out indents if formatting is not desired
968
if not formatted:
969
indent = ""
970
nextLevelIndent = ""
971
nl = ""
972
973
selfTag = None
974
if doctag is not None:
975
selfTag = doctag
976
else:
977
if self.__name:
978
selfTag = self.__name
979
980
if not selfTag:
981
if namedItemsOnly:
982
return ""
983
else:
984
selfTag = "ITEM"
985
986
out += [nl, indent, "<", selfTag, ">"]
987
988
for i, res in enumerate(self.__toklist):
989
if isinstance(res, ParseResults):
990
if i in namedItems:
991
out += [res.asXML(namedItems[i],
992
namedItemsOnly and doctag is None,
993
nextLevelIndent,
994
formatted)]
995
else:
996
out += [res.asXML(None,
997
namedItemsOnly and doctag is None,
998
nextLevelIndent,
999
formatted)]
1000
else:
1001
# individual token, see if there is a name for it
1002
resTag = None
1003
if i in namedItems:
1004
resTag = namedItems[i]
1005
if not resTag:
1006
if namedItemsOnly:
1007
continue
1008
else:
1009
resTag = "ITEM"
1010
xmlBodyText = _xml_escape(_ustr(res))
1011
out += [nl, nextLevelIndent, "<", resTag, ">",
1012
xmlBodyText,
1013
"</", resTag, ">"]
1014
1015
out += [nl, indent, "</", selfTag, ">"]
1016
return "".join(out)
1017
1018
def __lookup(self, sub):
1019
for k, vlist in self.__tokdict.items():
1020
for v, loc in vlist:
1021
if sub is v:
1022
return k
1023
return None
1024
1025
def getName(self):
1026
r"""
1027
Returns the results name for this token expression. Useful when several
1028
different expressions might match at a particular location.
1029
1030
Example::
1031
1032
integer = Word(nums)
1033
ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
1034
house_number_expr = Suppress('#') + Word(nums, alphanums)
1035
user_data = (Group(house_number_expr)("house_number")
1036
| Group(ssn_expr)("ssn")
1037
| Group(integer)("age"))
1038
user_info = OneOrMore(user_data)
1039
1040
result = user_info.parseString("22 111-22-3333 #221B")
1041
for item in result:
1042
print(item.getName(), ':', item[0])
1043
1044
prints::
1045
1046
age : 22
1047
ssn : 111-22-3333
1048
house_number : 221B
1049
"""
1050
if self.__name:
1051
return self.__name
1052
elif self.__parent:
1053
par = self.__parent()
1054
if par:
1055
return par.__lookup(self)
1056
else:
1057
return None
1058
elif (len(self) == 1
1059
and len(self.__tokdict) == 1
1060
and next(iter(self.__tokdict.values()))[0][1] in (0, -1)):
1061
return next(iter(self.__tokdict.keys()))
1062
else:
1063
return None
1064
1065
def dump(self, indent='', full=True, include_list=True, _depth=0):
1066
"""
1067
Diagnostic method for listing out the contents of
1068
a :class:`ParseResults`. Accepts an optional ``indent`` argument so
1069
that this string can be embedded in a nested display of other data.
1070
1071
Example::
1072
1073
integer = Word(nums)
1074
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1075
1076
result = date_str.parseString('12/31/1999')
1077
print(result.dump())
1078
1079
prints::
1080
1081
['12', '/', '31', '/', '1999']
1082
- day: 1999
1083
- month: 31
1084
- year: 12
1085
"""
1086
out = []
1087
NL = '\n'
1088
if include_list:
1089
out.append(indent + _ustr(self.asList()))
1090
else:
1091
out.append('')
1092
1093
if full:
1094
if self.haskeys():
1095
items = sorted((str(k), v) for k, v in self.items())
1096
for k, v in items:
1097
if out:
1098
out.append(NL)
1099
out.append("%s%s- %s: " % (indent, (' ' * _depth), k))
1100
if isinstance(v, ParseResults):
1101
if v:
1102
out.append(v.dump(indent=indent, full=full, include_list=include_list, _depth=_depth + 1))
1103
else:
1104
out.append(_ustr(v))
1105
else:
1106
out.append(repr(v))
1107
elif any(isinstance(vv, ParseResults) for vv in self):
1108
v = self
1109
for i, vv in enumerate(v):
1110
if isinstance(vv, ParseResults):
1111
out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
1112
(' ' * (_depth)),
1113
i,
1114
indent,
1115
(' ' * (_depth + 1)),
1116
vv.dump(indent=indent,
1117
full=full,
1118
include_list=include_list,
1119
_depth=_depth + 1)))
1120
else:
1121
out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
1122
(' ' * (_depth)),
1123
i,
1124
indent,
1125
(' ' * (_depth + 1)),
1126
_ustr(vv)))
1127
1128
return "".join(out)
1129
1130
def pprint(self, *args, **kwargs):
1131
"""
1132
Pretty-printer for parsed results as a list, using the
1133
`pprint <https://docs.python.org/3/library/pprint.html>`_ module.
1134
Accepts additional positional or keyword args as defined for
1135
`pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
1136
1137
Example::
1138
1139
ident = Word(alphas, alphanums)
1140
num = Word(nums)
1141
func = Forward()
1142
term = ident | num | Group('(' + func + ')')
1143
func <<= ident + Group(Optional(delimitedList(term)))
1144
result = func.parseString("fna a,b,(fnb c,d,200),100")
1145
result.pprint(width=40)
1146
1147
prints::
1148
1149
['fna',
1150
['a',
1151
'b',
1152
['(', 'fnb', ['c', 'd', '200'], ')'],
1153
'100']]
1154
"""
1155
pprint.pprint(self.asList(), *args, **kwargs)
1156
1157
# add support for pickle protocol
1158
def __getstate__(self):
1159
return (self.__toklist,
1160
(self.__tokdict.copy(),
1161
self.__parent is not None and self.__parent() or None,
1162
self.__accumNames,
1163
self.__name))
1164
1165
def __setstate__(self, state):
1166
self.__toklist = state[0]
1167
self.__tokdict, par, inAccumNames, self.__name = state[1]
1168
self.__accumNames = {}
1169
self.__accumNames.update(inAccumNames)
1170
if par is not None:
1171
self.__parent = wkref(par)
1172
else:
1173
self.__parent = None
1174
1175
def __getnewargs__(self):
1176
return self.__toklist, self.__name, self.__asList, self.__modal
1177
1178
def __dir__(self):
1179
return dir(type(self)) + list(self.keys())
1180
1181
@classmethod
1182
def from_dict(cls, other, name=None):
1183
"""
1184
Helper classmethod to construct a ParseResults from a dict, preserving the
1185
name-value relations as results names. If an optional 'name' argument is
1186
given, a nested ParseResults will be returned
1187
"""
1188
def is_iterable(obj):
1189
try:
1190
iter(obj)
1191
except Exception:
1192
return False
1193
else:
1194
if PY_3:
1195
return not isinstance(obj, (str, bytes))
1196
else:
1197
return not isinstance(obj, basestring)
1198
1199
ret = cls([])
1200
for k, v in other.items():
1201
if isinstance(v, Mapping):
1202
ret += cls.from_dict(v, name=k)
1203
else:
1204
ret += cls([v], name=k, asList=is_iterable(v))
1205
if name is not None:
1206
ret = cls([ret], name=name)
1207
return ret
1208
1209
MutableMapping.register(ParseResults)
1210
1211
def col (loc, strg):
1212
"""Returns current column within a string, counting newlines as line separators.
1213
The first column is number 1.
1214
1215
Note: the default parsing behavior is to expand tabs in the input string
1216
before starting the parsing process. See
1217
:class:`ParserElement.parseString` for more
1218
information on parsing strings containing ``<TAB>`` s, and suggested
1219
methods to maintain a consistent view of the parsed string, the parse
1220
location, and line and column positions within the parsed string.
1221
"""
1222
s = strg
1223
return 1 if 0 < loc < len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
1224
1225
def lineno(loc, strg):
1226
"""Returns current line number within a string, counting newlines as line separators.
1227
The first line is number 1.
1228
1229
Note - the default parsing behavior is to expand tabs in the input string
1230
before starting the parsing process. See :class:`ParserElement.parseString`
1231
for more information on parsing strings containing ``<TAB>`` s, and
1232
suggested methods to maintain a consistent view of the parsed string, the
1233
parse location, and line and column positions within the parsed string.
1234
"""
1235
return strg.count("\n", 0, loc) + 1
1236
1237
def line(loc, strg):
1238
"""Returns the line of text containing loc within a string, counting newlines as line separators.
1239
"""
1240
lastCR = strg.rfind("\n", 0, loc)
1241
nextCR = strg.find("\n", loc)
1242
if nextCR >= 0:
1243
return strg[lastCR + 1:nextCR]
1244
else:
1245
return strg[lastCR + 1:]
1246
1247
def _defaultStartDebugAction(instring, loc, expr):
1248
print(("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % (lineno(loc, instring), col(loc, instring))))
1249
1250
def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks):
1251
print("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1252
1253
def _defaultExceptionDebugAction(instring, loc, expr, exc):
1254
print("Exception raised:" + _ustr(exc))
1255
1256
def nullDebugAction(*args):
1257
"""'Do-nothing' debug action, to suppress debugging output during parsing."""
1258
pass
1259
1260
# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
1261
#~ 'decorator to trim function calls to match the arity of the target'
1262
#~ def _trim_arity(func, maxargs=3):
1263
#~ if func in singleArgBuiltins:
1264
#~ return lambda s,l,t: func(t)
1265
#~ limit = 0
1266
#~ foundArity = False
1267
#~ def wrapper(*args):
1268
#~ nonlocal limit,foundArity
1269
#~ while 1:
1270
#~ try:
1271
#~ ret = func(*args[limit:])
1272
#~ foundArity = True
1273
#~ return ret
1274
#~ except TypeError:
1275
#~ if limit == maxargs or foundArity:
1276
#~ raise
1277
#~ limit += 1
1278
#~ continue
1279
#~ return wrapper
1280
1281
# this version is Python 2.x-3.x cross-compatible
1282
'decorator to trim function calls to match the arity of the target'
1283
def _trim_arity(func, maxargs=2):
1284
if func in singleArgBuiltins:
1285
return lambda s, l, t: func(t)
1286
limit = [0]
1287
foundArity = [False]
1288
1289
# traceback return data structure changed in Py3.5 - normalize back to plain tuples
1290
if system_version[:2] >= (3, 5):
1291
def extract_stack(limit=0):
1292
# special handling for Python 3.5.0 - extra deep call stack by 1
1293
offset = -3 if system_version == (3, 5, 0) else -2
1294
frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset]
1295
return [frame_summary[:2]]
1296
def extract_tb(tb, limit=0):
1297
frames = traceback.extract_tb(tb, limit=limit)
1298
frame_summary = frames[-1]
1299
return [frame_summary[:2]]
1300
else:
1301
extract_stack = traceback.extract_stack
1302
extract_tb = traceback.extract_tb
1303
1304
# synthesize what would be returned by traceback.extract_stack at the call to
1305
# user's parse action 'func', so that we don't incur call penalty at parse time
1306
1307
LINE_DIFF = 6
1308
# IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
1309
# THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
1310
this_line = extract_stack(limit=2)[-1]
1311
pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF)
1312
1313
def wrapper(*args):
1314
while 1:
1315
try:
1316
ret = func(*args[limit[0]:])
1317
foundArity[0] = True
1318
return ret
1319
except TypeError:
1320
# re-raise TypeErrors if they did not come from our arity testing
1321
if foundArity[0]:
1322
raise
1323
else:
1324
try:
1325
tb = sys.exc_info()[-1]
1326
if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1327
raise
1328
finally:
1329
try:
1330
del tb
1331
except NameError:
1332
pass
1333
1334
if limit[0] <= maxargs:
1335
limit[0] += 1
1336
continue
1337
raise
1338
1339
# copy func name to wrapper for sensible debug output
1340
func_name = "<parse action>"
1341
try:
1342
func_name = getattr(func, '__name__',
1343
getattr(func, '__class__').__name__)
1344
except Exception:
1345
func_name = str(func)
1346
wrapper.__name__ = func_name
1347
1348
return wrapper
1349
1350
1351
class ParserElement(object):
1352
"""Abstract base level parser element class."""
1353
DEFAULT_WHITE_CHARS = " \n\t\r"
1354
verbose_stacktrace = False
1355
1356
@staticmethod
1357
def setDefaultWhitespaceChars(chars):
1358
r"""
1359
Overrides the default whitespace chars
1360
1361
Example::
1362
1363
# default whitespace chars are space, <TAB> and newline
1364
OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
1365
1366
# change to just treat newline as significant
1367
ParserElement.setDefaultWhitespaceChars(" \t")
1368
OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
1369
"""
1370
ParserElement.DEFAULT_WHITE_CHARS = chars
1371
1372
@staticmethod
1373
def inlineLiteralsUsing(cls):
1374
"""
1375
Set class to be used for inclusion of string literals into a parser.
1376
1377
Example::
1378
1379
# default literal class used is Literal
1380
integer = Word(nums)
1381
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1382
1383
date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1384
1385
1386
# change to Suppress
1387
ParserElement.inlineLiteralsUsing(Suppress)
1388
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1389
1390
date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
1391
"""
1392
ParserElement._literalStringClass = cls
1393
1394
@classmethod
1395
def _trim_traceback(cls, tb):
1396
while tb.tb_next:
1397
tb = tb.tb_next
1398
return tb
1399
1400
def __init__(self, savelist=False):
1401
self.parseAction = list()
1402
self.failAction = None
1403
# ~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
1404
self.strRepr = None
1405
self.resultsName = None
1406
self.saveAsList = savelist
1407
self.skipWhitespace = True
1408
self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
1409
self.copyDefaultWhiteChars = True
1410
self.mayReturnEmpty = False # used when checking for left-recursion
1411
self.keepTabs = False
1412
self.ignoreExprs = list()
1413
self.debug = False
1414
self.streamlined = False
1415
self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
1416
self.errmsg = ""
1417
self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
1418
self.debugActions = (None, None, None) # custom debug actions
1419
self.re = None
1420
self.callPreparse = True # used to avoid redundant calls to preParse
1421
self.callDuringTry = False
1422
1423
def copy(self):
1424
"""
1425
Make a copy of this :class:`ParserElement`. Useful for defining
1426
different parse actions for the same parsing pattern, using copies of
1427
the original parse element.
1428
1429
Example::
1430
1431
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1432
integerK = integer.copy().addParseAction(lambda toks: toks[0] * 1024) + Suppress("K")
1433
integerM = integer.copy().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
1434
1435
print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1436
1437
prints::
1438
1439
[5120, 100, 655360, 268435456]
1440
1441
Equivalent form of ``expr.copy()`` is just ``expr()``::
1442
1443
integerM = integer().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
1444
"""
1445
cpy = copy.copy(self)
1446
cpy.parseAction = self.parseAction[:]
1447
cpy.ignoreExprs = self.ignoreExprs[:]
1448
if self.copyDefaultWhiteChars:
1449
cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1450
return cpy
1451
1452
def setName(self, name):
1453
"""
1454
Define name for this expression, makes debugging and exception messages clearer.
1455
1456
Example::
1457
1458
Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1459
Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1460
"""
1461
self.name = name
1462
self.errmsg = "Expected " + self.name
1463
if __diag__.enable_debug_on_named_expressions:
1464
self.setDebug()
1465
return self
1466
1467
def setResultsName(self, name, listAllMatches=False):
1468
"""
1469
Define name for referencing matching tokens as a nested attribute
1470
of the returned parse results.
1471
NOTE: this returns a *copy* of the original :class:`ParserElement` object;
1472
this is so that the client can define a basic element, such as an
1473
integer, and reference it in multiple places with different names.
1474
1475
You can also set results names using the abbreviated syntax,
1476
``expr("name")`` in place of ``expr.setResultsName("name")``
1477
- see :class:`__call__`.
1478
1479
Example::
1480
1481
date_str = (integer.setResultsName("year") + '/'
1482
+ integer.setResultsName("month") + '/'
1483
+ integer.setResultsName("day"))
1484
1485
# equivalent form:
1486
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1487
"""
1488
return self._setResultsName(name, listAllMatches)
1489
1490
def _setResultsName(self, name, listAllMatches=False):
1491
newself = self.copy()
1492
if name.endswith("*"):
1493
name = name[:-1]
1494
listAllMatches = True
1495
newself.resultsName = name
1496
newself.modalResults = not listAllMatches
1497
return newself
1498
1499
def setBreak(self, breakFlag=True):
1500
"""Method to invoke the Python pdb debugger when this element is
1501
about to be parsed. Set ``breakFlag`` to True to enable, False to
1502
disable.
1503
"""
1504
if breakFlag:
1505
_parseMethod = self._parse
1506
def breaker(instring, loc, doActions=True, callPreParse=True):
1507
import pdb
1508
# this call to pdb.set_trace() is intentional, not a checkin error
1509
pdb.set_trace()
1510
return _parseMethod(instring, loc, doActions, callPreParse)
1511
breaker._originalParseMethod = _parseMethod
1512
self._parse = breaker
1513
else:
1514
if hasattr(self._parse, "_originalParseMethod"):
1515
self._parse = self._parse._originalParseMethod
1516
return self
1517
1518
def setParseAction(self, *fns, **kwargs):
1519
"""
1520
Define one or more actions to perform when successfully matching parse element definition.
1521
Parse action fn is a callable method with 0-3 arguments, called as ``fn(s, loc, toks)`` ,
1522
``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
1523
1524
- s = the original string being parsed (see note below)
1525
- loc = the location of the matching substring
1526
- toks = a list of the matched tokens, packaged as a :class:`ParseResults` object
1527
1528
If the functions in fns modify the tokens, they can return them as the return
1529
value from fn, and the modified list of tokens will replace the original.
1530
Otherwise, fn does not need to return any value.
1531
1532
If None is passed as the parse action, all previously added parse actions for this
1533
expression are cleared.
1534
1535
Optional keyword arguments:
1536
- callDuringTry = (default= ``False``) indicate if parse action should be run during lookaheads and alternate testing
1537
1538
Note: the default parsing behavior is to expand tabs in the input string
1539
before starting the parsing process. See :class:`parseString for more
1540
information on parsing strings containing ``<TAB>`` s, and suggested
1541
methods to maintain a consistent view of the parsed string, the parse
1542
location, and line and column positions within the parsed string.
1543
1544
Example::
1545
1546
integer = Word(nums)
1547
date_str = integer + '/' + integer + '/' + integer
1548
1549
date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1550
1551
# use parse action to convert to ints at parse time
1552
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1553
date_str = integer + '/' + integer + '/' + integer
1554
1555
# note that integer fields are now ints, not strings
1556
date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
1557
"""
1558
if list(fns) == [None,]:
1559
self.parseAction = []
1560
else:
1561
if not all(callable(fn) for fn in fns):
1562
raise TypeError("parse actions must be callable")
1563
self.parseAction = list(map(_trim_arity, list(fns)))
1564
self.callDuringTry = kwargs.get("callDuringTry", False)
1565
return self
1566
1567
def addParseAction(self, *fns, **kwargs):
1568
"""
1569
Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`.
1570
1571
See examples in :class:`copy`.
1572
"""
1573
self.parseAction += list(map(_trim_arity, list(fns)))
1574
self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1575
return self
1576
1577
def addCondition(self, *fns, **kwargs):
1578
"""Add a boolean predicate function to expression's list of parse actions. See
1579
:class:`setParseAction` for function call signatures. Unlike ``setParseAction``,
1580
functions passed to ``addCondition`` need to return boolean success/fail of the condition.
1581
1582
Optional keyword arguments:
1583
- message = define a custom message to be used in the raised exception
1584
- fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1585
1586
Example::
1587
1588
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1589
year_int = integer.copy()
1590
year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1591
date_str = year_int + '/' + integer + '/' + integer
1592
1593
result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1594
"""
1595
for fn in fns:
1596
self.parseAction.append(conditionAsParseAction(fn, message=kwargs.get('message'),
1597
fatal=kwargs.get('fatal', False)))
1598
1599
self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1600
return self
1601
1602
def setFailAction(self, fn):
1603
"""Define action to perform if parsing fails at this expression.
1604
Fail acton fn is a callable function that takes the arguments
1605
``fn(s, loc, expr, err)`` where:
1606
- s = string being parsed
1607
- loc = location where expression match was attempted and failed
1608
- expr = the parse expression that failed
1609
- err = the exception thrown
1610
The function returns no value. It may throw :class:`ParseFatalException`
1611
if it is desired to stop parsing immediately."""
1612
self.failAction = fn
1613
return self
1614
1615
def _skipIgnorables(self, instring, loc):
1616
exprsFound = True
1617
while exprsFound:
1618
exprsFound = False
1619
for e in self.ignoreExprs:
1620
try:
1621
while 1:
1622
loc, dummy = e._parse(instring, loc)
1623
exprsFound = True
1624
except ParseException:
1625
pass
1626
return loc
1627
1628
def preParse(self, instring, loc):
1629
if self.ignoreExprs:
1630
loc = self._skipIgnorables(instring, loc)
1631
1632
if self.skipWhitespace:
1633
wt = self.whiteChars
1634
instrlen = len(instring)
1635
while loc < instrlen and instring[loc] in wt:
1636
loc += 1
1637
1638
return loc
1639
1640
def parseImpl(self, instring, loc, doActions=True):
1641
return loc, []
1642
1643
def postParse(self, instring, loc, tokenlist):
1644
return tokenlist
1645
1646
# ~ @profile
1647
def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True):
1648
TRY, MATCH, FAIL = 0, 1, 2
1649
debugging = (self.debug) # and doActions)
1650
1651
if debugging or self.failAction:
1652
# ~ print ("Match", self, "at loc", loc, "(%d, %d)" % (lineno(loc, instring), col(loc, instring)))
1653
if self.debugActions[TRY]:
1654
self.debugActions[TRY](instring, loc, self)
1655
try:
1656
if callPreParse and self.callPreparse:
1657
preloc = self.preParse(instring, loc)
1658
else:
1659
preloc = loc
1660
tokensStart = preloc
1661
if self.mayIndexError or preloc >= len(instring):
1662
try:
1663
loc, tokens = self.parseImpl(instring, preloc, doActions)
1664
except IndexError:
1665
raise ParseException(instring, len(instring), self.errmsg, self)
1666
else:
1667
loc, tokens = self.parseImpl(instring, preloc, doActions)
1668
except Exception as err:
1669
# ~ print ("Exception raised:", err)
1670
if self.debugActions[FAIL]:
1671
self.debugActions[FAIL](instring, tokensStart, self, err)
1672
if self.failAction:
1673
self.failAction(instring, tokensStart, self, err)
1674
raise
1675
else:
1676
if callPreParse and self.callPreparse:
1677
preloc = self.preParse(instring, loc)
1678
else:
1679
preloc = loc
1680
tokensStart = preloc
1681
if self.mayIndexError or preloc >= len(instring):
1682
try:
1683
loc, tokens = self.parseImpl(instring, preloc, doActions)
1684
except IndexError:
1685
raise ParseException(instring, len(instring), self.errmsg, self)
1686
else:
1687
loc, tokens = self.parseImpl(instring, preloc, doActions)
1688
1689
tokens = self.postParse(instring, loc, tokens)
1690
1691
retTokens = ParseResults(tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults)
1692
if self.parseAction and (doActions or self.callDuringTry):
1693
if debugging:
1694
try:
1695
for fn in self.parseAction:
1696
try:
1697
tokens = fn(instring, tokensStart, retTokens)
1698
except IndexError as parse_action_exc:
1699
exc = ParseException("exception raised in parse action")
1700
exc.__cause__ = parse_action_exc
1701
raise exc
1702
1703
if tokens is not None and tokens is not retTokens:
1704
retTokens = ParseResults(tokens,
1705
self.resultsName,
1706
asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
1707
modal=self.modalResults)
1708
except Exception as err:
1709
# ~ print "Exception raised in user parse action:", err
1710
if self.debugActions[FAIL]:
1711
self.debugActions[FAIL](instring, tokensStart, self, err)
1712
raise
1713
else:
1714
for fn in self.parseAction:
1715
try:
1716
tokens = fn(instring, tokensStart, retTokens)
1717
except IndexError as parse_action_exc:
1718
exc = ParseException("exception raised in parse action")
1719
exc.__cause__ = parse_action_exc
1720
raise exc
1721
1722
if tokens is not None and tokens is not retTokens:
1723
retTokens = ParseResults(tokens,
1724
self.resultsName,
1725
asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
1726
modal=self.modalResults)
1727
if debugging:
1728
# ~ print ("Matched", self, "->", retTokens.asList())
1729
if self.debugActions[MATCH]:
1730
self.debugActions[MATCH](instring, tokensStart, loc, self, retTokens)
1731
1732
return loc, retTokens
1733
1734
def tryParse(self, instring, loc):
1735
try:
1736
return self._parse(instring, loc, doActions=False)[0]
1737
except ParseFatalException:
1738
raise ParseException(instring, loc, self.errmsg, self)
1739
1740
def canParseNext(self, instring, loc):
1741
try:
1742
self.tryParse(instring, loc)
1743
except (ParseException, IndexError):
1744
return False
1745
else:
1746
return True
1747
1748
class _UnboundedCache(object):
1749
def __init__(self):
1750
cache = {}
1751
self.not_in_cache = not_in_cache = object()
1752
1753
def get(self, key):
1754
return cache.get(key, not_in_cache)
1755
1756
def set(self, key, value):
1757
cache[key] = value
1758
1759
def clear(self):
1760
cache.clear()
1761
1762
def cache_len(self):
1763
return len(cache)
1764
1765
self.get = types.MethodType(get, self)
1766
self.set = types.MethodType(set, self)
1767
self.clear = types.MethodType(clear, self)
1768
self.__len__ = types.MethodType(cache_len, self)
1769
1770
if _OrderedDict is not None:
1771
class _FifoCache(object):
1772
def __init__(self, size):
1773
self.not_in_cache = not_in_cache = object()
1774
1775
cache = _OrderedDict()
1776
1777
def get(self, key):
1778
return cache.get(key, not_in_cache)
1779
1780
def set(self, key, value):
1781
cache[key] = value
1782
while len(cache) > size:
1783
try:
1784
cache.popitem(False)
1785
except KeyError:
1786
pass
1787
1788
def clear(self):
1789
cache.clear()
1790
1791
def cache_len(self):
1792
return len(cache)
1793
1794
self.get = types.MethodType(get, self)
1795
self.set = types.MethodType(set, self)
1796
self.clear = types.MethodType(clear, self)
1797
self.__len__ = types.MethodType(cache_len, self)
1798
1799
else:
1800
class _FifoCache(object):
1801
def __init__(self, size):
1802
self.not_in_cache = not_in_cache = object()
1803
1804
cache = {}
1805
key_fifo = collections.deque([], size)
1806
1807
def get(self, key):
1808
return cache.get(key, not_in_cache)
1809
1810
def set(self, key, value):
1811
cache[key] = value
1812
while len(key_fifo) > size:
1813
cache.pop(key_fifo.popleft(), None)
1814
key_fifo.append(key)
1815
1816
def clear(self):
1817
cache.clear()
1818
key_fifo.clear()
1819
1820
def cache_len(self):
1821
return len(cache)
1822
1823
self.get = types.MethodType(get, self)
1824
self.set = types.MethodType(set, self)
1825
self.clear = types.MethodType(clear, self)
1826
self.__len__ = types.MethodType(cache_len, self)
1827
1828
# argument cache for optimizing repeated calls when backtracking through recursive expressions
1829
packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
1830
packrat_cache_lock = RLock()
1831
packrat_cache_stats = [0, 0]
1832
1833
# this method gets repeatedly called during backtracking with the same arguments -
1834
# we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1835
def _parseCache(self, instring, loc, doActions=True, callPreParse=True):
1836
HIT, MISS = 0, 1
1837
lookup = (self, instring, loc, callPreParse, doActions)
1838
with ParserElement.packrat_cache_lock:
1839
cache = ParserElement.packrat_cache
1840
value = cache.get(lookup)
1841
if value is cache.not_in_cache:
1842
ParserElement.packrat_cache_stats[MISS] += 1
1843
try:
1844
value = self._parseNoCache(instring, loc, doActions, callPreParse)
1845
except ParseBaseException as pe:
1846
# cache a copy of the exception, without the traceback
1847
cache.set(lookup, pe.__class__(*pe.args))
1848
raise
1849
else:
1850
cache.set(lookup, (value[0], value[1].copy()))
1851
return value
1852
else:
1853
ParserElement.packrat_cache_stats[HIT] += 1
1854
if isinstance(value, Exception):
1855
raise value
1856
return value[0], value[1].copy()
1857
1858
_parse = _parseNoCache
1859
1860
@staticmethod
1861
def resetCache():
1862
ParserElement.packrat_cache.clear()
1863
ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1864
1865
_packratEnabled = False
1866
@staticmethod
1867
def enablePackrat(cache_size_limit=128):
1868
"""Enables "packrat" parsing, which adds memoizing to the parsing logic.
1869
Repeated parse attempts at the same string location (which happens
1870
often in many complex grammars) can immediately return a cached value,
1871
instead of re-executing parsing/validating code. Memoizing is done of
1872
both valid results and parsing exceptions.
1873
1874
Parameters:
1875
1876
- cache_size_limit - (default= ``128``) - if an integer value is provided
1877
will limit the size of the packrat cache; if None is passed, then
1878
the cache size will be unbounded; if 0 is passed, the cache will
1879
be effectively disabled.
1880
1881
This speedup may break existing programs that use parse actions that
1882
have side-effects. For this reason, packrat parsing is disabled when
1883
you first import pyparsing. To activate the packrat feature, your
1884
program must call the class method :class:`ParserElement.enablePackrat`.
1885
For best results, call ``enablePackrat()`` immediately after
1886
importing pyparsing.
1887
1888
Example::
1889
1890
from pip._vendor import pyparsing
1891
pyparsing.ParserElement.enablePackrat()
1892
"""
1893
if not ParserElement._packratEnabled:
1894
ParserElement._packratEnabled = True
1895
if cache_size_limit is None:
1896
ParserElement.packrat_cache = ParserElement._UnboundedCache()
1897
else:
1898
ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1899
ParserElement._parse = ParserElement._parseCache
1900
1901
def parseString(self, instring, parseAll=False):
1902
"""
1903
Execute the parse expression with the given string.
1904
This is the main interface to the client code, once the complete
1905
expression has been built.
1906
1907
Returns the parsed data as a :class:`ParseResults` object, which may be
1908
accessed as a list, or as a dict or object with attributes if the given parser
1909
includes results names.
1910
1911
If you want the grammar to require that the entire input string be
1912
successfully parsed, then set ``parseAll`` to True (equivalent to ending
1913
the grammar with ``StringEnd()``).
1914
1915
Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string,
1916
in order to report proper column numbers in parse actions.
1917
If the input string contains tabs and
1918
the grammar uses parse actions that use the ``loc`` argument to index into the
1919
string being parsed, you can ensure you have a consistent view of the input
1920
string by:
1921
1922
- calling ``parseWithTabs`` on your grammar before calling ``parseString``
1923
(see :class:`parseWithTabs`)
1924
- define your parse action using the full ``(s, loc, toks)`` signature, and
1925
reference the input string using the parse action's ``s`` argument
1926
- explictly expand the tabs in your input string before calling
1927
``parseString``
1928
1929
Example::
1930
1931
Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
1932
Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
1933
"""
1934
ParserElement.resetCache()
1935
if not self.streamlined:
1936
self.streamline()
1937
# ~ self.saveAsList = True
1938
for e in self.ignoreExprs:
1939
e.streamline()
1940
if not self.keepTabs:
1941
instring = instring.expandtabs()
1942
try:
1943
loc, tokens = self._parse(instring, 0)
1944
if parseAll:
1945
loc = self.preParse(instring, loc)
1946
se = Empty() + StringEnd()
1947
se._parse(instring, loc)
1948
except ParseBaseException as exc:
1949
if ParserElement.verbose_stacktrace:
1950
raise
1951
else:
1952
# catch and re-raise exception from here, clearing out pyparsing internal stack trace
1953
if getattr(exc, '__traceback__', None) is not None:
1954
exc.__traceback__ = self._trim_traceback(exc.__traceback__)
1955
raise exc
1956
else:
1957
return tokens
1958
1959
def scanString(self, instring, maxMatches=_MAX_INT, overlap=False):
1960
"""
1961
Scan the input string for expression matches. Each match will return the
1962
matching tokens, start location, and end location. May be called with optional
1963
``maxMatches`` argument, to clip scanning after 'n' matches are found. If
1964
``overlap`` is specified, then overlapping matches will be reported.
1965
1966
Note that the start and end locations are reported relative to the string
1967
being parsed. See :class:`parseString` for more information on parsing
1968
strings with embedded tabs.
1969
1970
Example::
1971
1972
source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1973
print(source)
1974
for tokens, start, end in Word(alphas).scanString(source):
1975
print(' '*start + '^'*(end-start))
1976
print(' '*start + tokens[0])
1977
1978
prints::
1979
1980
sldjf123lsdjjkf345sldkjf879lkjsfd987
1981
^^^^^
1982
sldjf
1983
^^^^^^^
1984
lsdjjkf
1985
^^^^^^
1986
sldkjf
1987
^^^^^^
1988
lkjsfd
1989
"""
1990
if not self.streamlined:
1991
self.streamline()
1992
for e in self.ignoreExprs:
1993
e.streamline()
1994
1995
if not self.keepTabs:
1996
instring = _ustr(instring).expandtabs()
1997
instrlen = len(instring)
1998
loc = 0
1999
preparseFn = self.preParse
2000
parseFn = self._parse
2001
ParserElement.resetCache()
2002
matches = 0
2003
try:
2004
while loc <= instrlen and matches < maxMatches:
2005
try:
2006
preloc = preparseFn(instring, loc)
2007
nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
2008
except ParseException:
2009
loc = preloc + 1
2010
else:
2011
if nextLoc > loc:
2012
matches += 1
2013
yield tokens, preloc, nextLoc
2014
if overlap:
2015
nextloc = preparseFn(instring, loc)
2016
if nextloc > loc:
2017
loc = nextLoc
2018
else:
2019
loc += 1
2020
else:
2021
loc = nextLoc
2022
else:
2023
loc = preloc + 1
2024
except ParseBaseException as exc:
2025
if ParserElement.verbose_stacktrace:
2026
raise
2027
else:
2028
# catch and re-raise exception from here, clearing out pyparsing internal stack trace
2029
if getattr(exc, '__traceback__', None) is not None:
2030
exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2031
raise exc
2032
2033
def transformString(self, instring):
2034
"""
2035
Extension to :class:`scanString`, to modify matching text with modified tokens that may
2036
be returned from a parse action. To use ``transformString``, define a grammar and
2037
attach a parse action to it that modifies the returned token list.
2038
Invoking ``transformString()`` on a target string will then scan for matches,
2039
and replace the matched text patterns according to the logic in the parse
2040
action. ``transformString()`` returns the resulting transformed string.
2041
2042
Example::
2043
2044
wd = Word(alphas)
2045
wd.setParseAction(lambda toks: toks[0].title())
2046
2047
print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
2048
2049
prints::
2050
2051
Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
2052
"""
2053
out = []
2054
lastE = 0
2055
# force preservation of <TAB>s, to minimize unwanted transformation of string, and to
2056
# keep string locs straight between transformString and scanString
2057
self.keepTabs = True
2058
try:
2059
for t, s, e in self.scanString(instring):
2060
out.append(instring[lastE:s])
2061
if t:
2062
if isinstance(t, ParseResults):
2063
out += t.asList()
2064
elif isinstance(t, list):
2065
out += t
2066
else:
2067
out.append(t)
2068
lastE = e
2069
out.append(instring[lastE:])
2070
out = [o for o in out if o]
2071
return "".join(map(_ustr, _flatten(out)))
2072
except ParseBaseException as exc:
2073
if ParserElement.verbose_stacktrace:
2074
raise
2075
else:
2076
# catch and re-raise exception from here, clearing out pyparsing internal stack trace
2077
if getattr(exc, '__traceback__', None) is not None:
2078
exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2079
raise exc
2080
2081
def searchString(self, instring, maxMatches=_MAX_INT):
2082
"""
2083
Another extension to :class:`scanString`, simplifying the access to the tokens found
2084
to match the given parse expression. May be called with optional
2085
``maxMatches`` argument, to clip searching after 'n' matches are found.
2086
2087
Example::
2088
2089
# a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
2090
cap_word = Word(alphas.upper(), alphas.lower())
2091
2092
print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
2093
2094
# the sum() builtin can be used to merge results into a single ParseResults object
2095
print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
2096
2097
prints::
2098
2099
[['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
2100
['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
2101
"""
2102
try:
2103
return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)])
2104
except ParseBaseException as exc:
2105
if ParserElement.verbose_stacktrace:
2106
raise
2107
else:
2108
# catch and re-raise exception from here, clearing out pyparsing internal stack trace
2109
if getattr(exc, '__traceback__', None) is not None:
2110
exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2111
raise exc
2112
2113
def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
2114
"""
2115
Generator method to split a string using the given expression as a separator.
2116
May be called with optional ``maxsplit`` argument, to limit the number of splits;
2117
and the optional ``includeSeparators`` argument (default= ``False``), if the separating
2118
matching text should be included in the split results.
2119
2120
Example::
2121
2122
punc = oneOf(list(".,;:/-!?"))
2123
print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
2124
2125
prints::
2126
2127
['This', ' this', '', ' this sentence', ' is badly punctuated', '']
2128
"""
2129
splits = 0
2130
last = 0
2131
for t, s, e in self.scanString(instring, maxMatches=maxsplit):
2132
yield instring[last:s]
2133
if includeSeparators:
2134
yield t[0]
2135
last = e
2136
yield instring[last:]
2137
2138
def __add__(self, other):
2139
"""
2140
Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement
2141
converts them to :class:`Literal`s by default.
2142
2143
Example::
2144
2145
greet = Word(alphas) + "," + Word(alphas) + "!"
2146
hello = "Hello, World!"
2147
print (hello, "->", greet.parseString(hello))
2148
2149
prints::
2150
2151
Hello, World! -> ['Hello', ',', 'World', '!']
2152
2153
``...`` may be used as a parse expression as a short form of :class:`SkipTo`.
2154
2155
Literal('start') + ... + Literal('end')
2156
2157
is equivalent to:
2158
2159
Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
2160
2161
Note that the skipped text is returned with '_skipped' as a results name,
2162
and to support having multiple skips in the same parser, the value returned is
2163
a list of all skipped text.
2164
"""
2165
if other is Ellipsis:
2166
return _PendingSkip(self)
2167
2168
if isinstance(other, basestring):
2169
other = self._literalStringClass(other)
2170
if not isinstance(other, ParserElement):
2171
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2172
SyntaxWarning, stacklevel=2)
2173
return None
2174
return And([self, other])
2175
2176
def __radd__(self, other):
2177
"""
2178
Implementation of + operator when left operand is not a :class:`ParserElement`
2179
"""
2180
if other is Ellipsis:
2181
return SkipTo(self)("_skipped*") + self
2182
2183
if isinstance(other, basestring):
2184
other = self._literalStringClass(other)
2185
if not isinstance(other, ParserElement):
2186
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2187
SyntaxWarning, stacklevel=2)
2188
return None
2189
return other + self
2190
2191
def __sub__(self, other):
2192
"""
2193
Implementation of - operator, returns :class:`And` with error stop
2194
"""
2195
if isinstance(other, basestring):
2196
other = self._literalStringClass(other)
2197
if not isinstance(other, ParserElement):
2198
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2199
SyntaxWarning, stacklevel=2)
2200
return None
2201
return self + And._ErrorStop() + other
2202
2203
def __rsub__(self, other):
2204
"""
2205
Implementation of - operator when left operand is not a :class:`ParserElement`
2206
"""
2207
if isinstance(other, basestring):
2208
other = self._literalStringClass(other)
2209
if not isinstance(other, ParserElement):
2210
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2211
SyntaxWarning, stacklevel=2)
2212
return None
2213
return other - self
2214
2215
def __mul__(self, other):
2216
"""
2217
Implementation of * operator, allows use of ``expr * 3`` in place of
2218
``expr + expr + expr``. Expressions may also me multiplied by a 2-integer
2219
tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
2220
may also include ``None`` as in:
2221
- ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
2222
to ``expr*n + ZeroOrMore(expr)``
2223
(read as "at least n instances of ``expr``")
2224
- ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
2225
(read as "0 to n instances of ``expr``")
2226
- ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
2227
- ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
2228
2229
Note that ``expr*(None, n)`` does not raise an exception if
2230
more than n exprs exist in the input stream; that is,
2231
``expr*(None, n)`` does not enforce a maximum number of expr
2232
occurrences. If this behavior is desired, then write
2233
``expr*(None, n) + ~expr``
2234
"""
2235
if other is Ellipsis:
2236
other = (0, None)
2237
elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
2238
other = ((0, ) + other[1:] + (None,))[:2]
2239
2240
if isinstance(other, int):
2241
minElements, optElements = other, 0
2242
elif isinstance(other, tuple):
2243
other = tuple(o if o is not Ellipsis else None for o in other)
2244
other = (other + (None, None))[:2]
2245
if other[0] is None:
2246
other = (0, other[1])
2247
if isinstance(other[0], int) and other[1] is None:
2248
if other[0] == 0:
2249
return ZeroOrMore(self)
2250
if other[0] == 1:
2251
return OneOrMore(self)
2252
else:
2253
return self * other[0] + ZeroOrMore(self)
2254
elif isinstance(other[0], int) and isinstance(other[1], int):
2255
minElements, optElements = other
2256
optElements -= minElements
2257
else:
2258
raise TypeError("cannot multiply 'ParserElement' and ('%s', '%s') objects", type(other[0]), type(other[1]))
2259
else:
2260
raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
2261
2262
if minElements < 0:
2263
raise ValueError("cannot multiply ParserElement by negative value")
2264
if optElements < 0:
2265
raise ValueError("second tuple value must be greater or equal to first tuple value")
2266
if minElements == optElements == 0:
2267
raise ValueError("cannot multiply ParserElement by 0 or (0, 0)")
2268
2269
if optElements:
2270
def makeOptionalList(n):
2271
if n > 1:
2272
return Optional(self + makeOptionalList(n - 1))
2273
else:
2274
return Optional(self)
2275
if minElements:
2276
if minElements == 1:
2277
ret = self + makeOptionalList(optElements)
2278
else:
2279
ret = And([self] * minElements) + makeOptionalList(optElements)
2280
else:
2281
ret = makeOptionalList(optElements)
2282
else:
2283
if minElements == 1:
2284
ret = self
2285
else:
2286
ret = And([self] * minElements)
2287
return ret
2288
2289
def __rmul__(self, other):
2290
return self.__mul__(other)
2291
2292
def __or__(self, other):
2293
"""
2294
Implementation of | operator - returns :class:`MatchFirst`
2295
"""
2296
if other is Ellipsis:
2297
return _PendingSkip(self, must_skip=True)
2298
2299
if isinstance(other, basestring):
2300
other = self._literalStringClass(other)
2301
if not isinstance(other, ParserElement):
2302
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2303
SyntaxWarning, stacklevel=2)
2304
return None
2305
return MatchFirst([self, other])
2306
2307
def __ror__(self, other):
2308
"""
2309
Implementation of | operator when left operand is not a :class:`ParserElement`
2310
"""
2311
if isinstance(other, basestring):
2312
other = self._literalStringClass(other)
2313
if not isinstance(other, ParserElement):
2314
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2315
SyntaxWarning, stacklevel=2)
2316
return None
2317
return other | self
2318
2319
def __xor__(self, other):
2320
"""
2321
Implementation of ^ operator - returns :class:`Or`
2322
"""
2323
if isinstance(other, basestring):
2324
other = self._literalStringClass(other)
2325
if not isinstance(other, ParserElement):
2326
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2327
SyntaxWarning, stacklevel=2)
2328
return None
2329
return Or([self, other])
2330
2331
def __rxor__(self, other):
2332
"""
2333
Implementation of ^ operator when left operand is not a :class:`ParserElement`
2334
"""
2335
if isinstance(other, basestring):
2336
other = self._literalStringClass(other)
2337
if not isinstance(other, ParserElement):
2338
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2339
SyntaxWarning, stacklevel=2)
2340
return None
2341
return other ^ self
2342
2343
def __and__(self, other):
2344
"""
2345
Implementation of & operator - returns :class:`Each`
2346
"""
2347
if isinstance(other, basestring):
2348
other = self._literalStringClass(other)
2349
if not isinstance(other, ParserElement):
2350
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2351
SyntaxWarning, stacklevel=2)
2352
return None
2353
return Each([self, other])
2354
2355
def __rand__(self, other):
2356
"""
2357
Implementation of & operator when left operand is not a :class:`ParserElement`
2358
"""
2359
if isinstance(other, basestring):
2360
other = self._literalStringClass(other)
2361
if not isinstance(other, ParserElement):
2362
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2363
SyntaxWarning, stacklevel=2)
2364
return None
2365
return other & self
2366
2367
def __invert__(self):
2368
"""
2369
Implementation of ~ operator - returns :class:`NotAny`
2370
"""
2371
return NotAny(self)
2372
2373
def __iter__(self):
2374
# must implement __iter__ to override legacy use of sequential access to __getitem__ to
2375
# iterate over a sequence
2376
raise TypeError('%r object is not iterable' % self.__class__.__name__)
2377
2378
def __getitem__(self, key):
2379
"""
2380
use ``[]`` indexing notation as a short form for expression repetition:
2381
- ``expr[n]`` is equivalent to ``expr*n``
2382
- ``expr[m, n]`` is equivalent to ``expr*(m, n)``
2383
- ``expr[n, ...]`` or ``expr[n,]`` is equivalent
2384
to ``expr*n + ZeroOrMore(expr)``
2385
(read as "at least n instances of ``expr``")
2386
- ``expr[..., n]`` is equivalent to ``expr*(0, n)``
2387
(read as "0 to n instances of ``expr``")
2388
- ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
2389
- ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
2390
``None`` may be used in place of ``...``.
2391
2392
Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
2393
if more than ``n`` ``expr``s exist in the input stream. If this behavior is
2394
desired, then write ``expr[..., n] + ~expr``.
2395
"""
2396
2397
# convert single arg keys to tuples
2398
try:
2399
if isinstance(key, str):
2400
key = (key,)
2401
iter(key)
2402
except TypeError:
2403
key = (key, key)
2404
2405
if len(key) > 2:
2406
warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5],
2407
'... [{0}]'.format(len(key))
2408
if len(key) > 5 else ''))
2409
2410
# clip to 2 elements
2411
ret = self * tuple(key[:2])
2412
return ret
2413
2414
def __call__(self, name=None):
2415
"""
2416
Shortcut for :class:`setResultsName`, with ``listAllMatches=False``.
2417
2418
If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be
2419
passed as ``True``.
2420
2421
If ``name` is omitted, same as calling :class:`copy`.
2422
2423
Example::
2424
2425
# these are equivalent
2426
userdata = Word(alphas).setResultsName("name") + Word(nums + "-").setResultsName("socsecno")
2427
userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
2428
"""
2429
if name is not None:
2430
return self._setResultsName(name)
2431
else:
2432
return self.copy()
2433
2434
def suppress(self):
2435
"""
2436
Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
2437
cluttering up returned output.
2438
"""
2439
return Suppress(self)
2440
2441
def leaveWhitespace(self):
2442
"""
2443
Disables the skipping of whitespace before matching the characters in the
2444
:class:`ParserElement`'s defined pattern. This is normally only used internally by
2445
the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2446
"""
2447
self.skipWhitespace = False
2448
return self
2449
2450
def setWhitespaceChars(self, chars):
2451
"""
2452
Overrides the default whitespace chars
2453
"""
2454
self.skipWhitespace = True
2455
self.whiteChars = chars
2456
self.copyDefaultWhiteChars = False
2457
return self
2458
2459
def parseWithTabs(self):
2460
"""
2461
Overrides default behavior to expand ``<TAB>``s to spaces before parsing the input string.
2462
Must be called before ``parseString`` when the input grammar contains elements that
2463
match ``<TAB>`` characters.
2464
"""
2465
self.keepTabs = True
2466
return self
2467
2468
def ignore(self, other):
2469
"""
2470
Define expression to be ignored (e.g., comments) while doing pattern
2471
matching; may be called repeatedly, to define multiple comment or other
2472
ignorable patterns.
2473
2474
Example::
2475
2476
patt = OneOrMore(Word(alphas))
2477
patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2478
2479
patt.ignore(cStyleComment)
2480
patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2481
"""
2482
if isinstance(other, basestring):
2483
other = Suppress(other)
2484
2485
if isinstance(other, Suppress):
2486
if other not in self.ignoreExprs:
2487
self.ignoreExprs.append(other)
2488
else:
2489
self.ignoreExprs.append(Suppress(other.copy()))
2490
return self
2491
2492
def setDebugActions(self, startAction, successAction, exceptionAction):
2493
"""
2494
Enable display of debugging messages while doing pattern matching.
2495
"""
2496
self.debugActions = (startAction or _defaultStartDebugAction,
2497
successAction or _defaultSuccessDebugAction,
2498
exceptionAction or _defaultExceptionDebugAction)
2499
self.debug = True
2500
return self
2501
2502
def setDebug(self, flag=True):
2503
"""
2504
Enable display of debugging messages while doing pattern matching.
2505
Set ``flag`` to True to enable, False to disable.
2506
2507
Example::
2508
2509
wd = Word(alphas).setName("alphaword")
2510
integer = Word(nums).setName("numword")
2511
term = wd | integer
2512
2513
# turn on debugging for wd
2514
wd.setDebug()
2515
2516
OneOrMore(term).parseString("abc 123 xyz 890")
2517
2518
prints::
2519
2520
Match alphaword at loc 0(1,1)
2521
Matched alphaword -> ['abc']
2522
Match alphaword at loc 3(1,4)
2523
Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2524
Match alphaword at loc 7(1,8)
2525
Matched alphaword -> ['xyz']
2526
Match alphaword at loc 11(1,12)
2527
Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2528
Match alphaword at loc 15(1,16)
2529
Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2530
2531
The output shown is that produced by the default debug actions - custom debug actions can be
2532
specified using :class:`setDebugActions`. Prior to attempting
2533
to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
2534
is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
2535
message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression,
2536
which makes debugging and exception messages easier to understand - for instance, the default
2537
name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``.
2538
"""
2539
if flag:
2540
self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction)
2541
else:
2542
self.debug = False
2543
return self
2544
2545
def __str__(self):
2546
return self.name
2547
2548
def __repr__(self):
2549
return _ustr(self)
2550
2551
def streamline(self):
2552
self.streamlined = True
2553
self.strRepr = None
2554
return self
2555
2556
def checkRecursion(self, parseElementList):
2557
pass
2558
2559
def validate(self, validateTrace=None):
2560
"""
2561
Check defined expressions for valid structure, check for infinite recursive definitions.
2562
"""
2563
self.checkRecursion([])
2564
2565
def parseFile(self, file_or_filename, parseAll=False):
2566
"""
2567
Execute the parse expression on the given file or filename.
2568
If a filename is specified (instead of a file object),
2569
the entire file is opened, read, and closed before parsing.
2570
"""
2571
try:
2572
file_contents = file_or_filename.read()
2573
except AttributeError:
2574
with open(file_or_filename, "r") as f:
2575
file_contents = f.read()
2576
try:
2577
return self.parseString(file_contents, parseAll)
2578
except ParseBaseException as exc:
2579
if ParserElement.verbose_stacktrace:
2580
raise
2581
else:
2582
# catch and re-raise exception from here, clearing out pyparsing internal stack trace
2583
if getattr(exc, '__traceback__', None) is not None:
2584
exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2585
raise exc
2586
2587
def __eq__(self, other):
2588
if self is other:
2589
return True
2590
elif isinstance(other, basestring):
2591
return self.matches(other)
2592
elif isinstance(other, ParserElement):
2593
return vars(self) == vars(other)
2594
return False
2595
2596
def __ne__(self, other):
2597
return not (self == other)
2598
2599
def __hash__(self):
2600
return id(self)
2601
2602
def __req__(self, other):
2603
return self == other
2604
2605
def __rne__(self, other):
2606
return not (self == other)
2607
2608
def matches(self, testString, parseAll=True):
2609
"""
2610
Method for quick testing of a parser against a test string. Good for simple
2611
inline microtests of sub expressions while building up larger parser.
2612
2613
Parameters:
2614
- testString - to test against this expression for a match
2615
- parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
2616
2617
Example::
2618
2619
expr = Word(nums)
2620
assert expr.matches("100")
2621
"""
2622
try:
2623
self.parseString(_ustr(testString), parseAll=parseAll)
2624
return True
2625
except ParseBaseException:
2626
return False
2627
2628
def runTests(self, tests, parseAll=True, comment='#',
2629
fullDump=True, printResults=True, failureTests=False, postParse=None,
2630
file=None):
2631
"""
2632
Execute the parse expression on a series of test strings, showing each
2633
test, the parsed results or where the parse failed. Quick and easy way to
2634
run a parse expression against a list of sample strings.
2635
2636
Parameters:
2637
- tests - a list of separate test strings, or a multiline string of test strings
2638
- parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
2639
- comment - (default= ``'#'``) - expression for indicating embedded comments in the test
2640
string; pass None to disable comment filtering
2641
- fullDump - (default= ``True``) - dump results as list followed by results names in nested outline;
2642
if False, only dump nested list
2643
- printResults - (default= ``True``) prints test output to stdout
2644
- failureTests - (default= ``False``) indicates if these tests are expected to fail parsing
2645
- postParse - (default= ``None``) optional callback for successful parse results; called as
2646
`fn(test_string, parse_results)` and returns a string to be added to the test output
2647
- file - (default=``None``) optional file-like object to which test output will be written;
2648
if None, will default to ``sys.stdout``
2649
2650
Returns: a (success, results) tuple, where success indicates that all tests succeeded
2651
(or failed if ``failureTests`` is True), and the results contain a list of lines of each
2652
test's output
2653
2654
Example::
2655
2656
number_expr = pyparsing_common.number.copy()
2657
2658
result = number_expr.runTests('''
2659
# unsigned integer
2660
100
2661
# negative integer
2662
-100
2663
# float with scientific notation
2664
6.02e23
2665
# integer with scientific notation
2666
1e-12
2667
''')
2668
print("Success" if result[0] else "Failed!")
2669
2670
result = number_expr.runTests('''
2671
# stray character
2672
100Z
2673
# missing leading digit before '.'
2674
-.100
2675
# too many '.'
2676
3.14.159
2677
''', failureTests=True)
2678
print("Success" if result[0] else "Failed!")
2679
2680
prints::
2681
2682
# unsigned integer
2683
100
2684
[100]
2685
2686
# negative integer
2687
-100
2688
[-100]
2689
2690
# float with scientific notation
2691
6.02e23
2692
[6.02e+23]
2693
2694
# integer with scientific notation
2695
1e-12
2696
[1e-12]
2697
2698
Success
2699
2700
# stray character
2701
100Z
2702
^
2703
FAIL: Expected end of text (at char 3), (line:1, col:4)
2704
2705
# missing leading digit before '.'
2706
-.100
2707
^
2708
FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2709
2710
# too many '.'
2711
3.14.159
2712
^
2713
FAIL: Expected end of text (at char 4), (line:1, col:5)
2714
2715
Success
2716
2717
Each test string must be on a single line. If you want to test a string that spans multiple
2718
lines, create a test like this::
2719
2720
expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2721
2722
(Note that this is a raw string literal, you must include the leading 'r'.)
2723
"""
2724
if isinstance(tests, basestring):
2725
tests = list(map(str.strip, tests.rstrip().splitlines()))
2726
if isinstance(comment, basestring):
2727
comment = Literal(comment)
2728
if file is None:
2729
file = sys.stdout
2730
print_ = file.write
2731
2732
allResults = []
2733
comments = []
2734
success = True
2735
NL = Literal(r'\n').addParseAction(replaceWith('\n')).ignore(quotedString)
2736
BOM = u'\ufeff'
2737
for t in tests:
2738
if comment is not None and comment.matches(t, False) or comments and not t:
2739
comments.append(t)
2740
continue
2741
if not t:
2742
continue
2743
out = ['\n' + '\n'.join(comments) if comments else '', t]
2744
comments = []
2745
try:
2746
# convert newline marks to actual newlines, and strip leading BOM if present
2747
t = NL.transformString(t.lstrip(BOM))
2748
result = self.parseString(t, parseAll=parseAll)
2749
except ParseBaseException as pe:
2750
fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2751
if '\n' in t:
2752
out.append(line(pe.loc, t))
2753
out.append(' ' * (col(pe.loc, t) - 1) + '^' + fatal)
2754
else:
2755
out.append(' ' * pe.loc + '^' + fatal)
2756
out.append("FAIL: " + str(pe))
2757
success = success and failureTests
2758
result = pe
2759
except Exception as exc:
2760
out.append("FAIL-EXCEPTION: " + str(exc))
2761
success = success and failureTests
2762
result = exc
2763
else:
2764
success = success and not failureTests
2765
if postParse is not None:
2766
try:
2767
pp_value = postParse(t, result)
2768
if pp_value is not None:
2769
if isinstance(pp_value, ParseResults):
2770
out.append(pp_value.dump())
2771
else:
2772
out.append(str(pp_value))
2773
else:
2774
out.append(result.dump())
2775
except Exception as e:
2776
out.append(result.dump(full=fullDump))
2777
out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e))
2778
else:
2779
out.append(result.dump(full=fullDump))
2780
2781
if printResults:
2782
if fullDump:
2783
out.append('')
2784
print_('\n'.join(out))
2785
2786
allResults.append((t, result))
2787
2788
return success, allResults
2789
2790
2791
class _PendingSkip(ParserElement):
2792
# internal placeholder class to hold a place were '...' is added to a parser element,
2793
# once another ParserElement is added, this placeholder will be replaced with a SkipTo
2794
def __init__(self, expr, must_skip=False):
2795
super(_PendingSkip, self).__init__()
2796
self.strRepr = str(expr + Empty()).replace('Empty', '...')
2797
self.name = self.strRepr
2798
self.anchor = expr
2799
self.must_skip = must_skip
2800
2801
def __add__(self, other):
2802
skipper = SkipTo(other).setName("...")("_skipped*")
2803
if self.must_skip:
2804
def must_skip(t):
2805
if not t._skipped or t._skipped.asList() == ['']:
2806
del t[0]
2807
t.pop("_skipped", None)
2808
def show_skip(t):
2809
if t._skipped.asList()[-1:] == ['']:
2810
skipped = t.pop('_skipped')
2811
t['_skipped'] = 'missing <' + repr(self.anchor) + '>'
2812
return (self.anchor + skipper().addParseAction(must_skip)
2813
| skipper().addParseAction(show_skip)) + other
2814
2815
return self.anchor + skipper + other
2816
2817
def __repr__(self):
2818
return self.strRepr
2819
2820
def parseImpl(self, *args):
2821
raise Exception("use of `...` expression without following SkipTo target expression")
2822
2823
2824
class Token(ParserElement):
2825
"""Abstract :class:`ParserElement` subclass, for defining atomic
2826
matching patterns.
2827
"""
2828
def __init__(self):
2829
super(Token, self).__init__(savelist=False)
2830
2831
2832
class Empty(Token):
2833
"""An empty token, will always match.
2834
"""
2835
def __init__(self):
2836
super(Empty, self).__init__()
2837
self.name = "Empty"
2838
self.mayReturnEmpty = True
2839
self.mayIndexError = False
2840
2841
2842
class NoMatch(Token):
2843
"""A token that will never match.
2844
"""
2845
def __init__(self):
2846
super(NoMatch, self).__init__()
2847
self.name = "NoMatch"
2848
self.mayReturnEmpty = True
2849
self.mayIndexError = False
2850
self.errmsg = "Unmatchable token"
2851
2852
def parseImpl(self, instring, loc, doActions=True):
2853
raise ParseException(instring, loc, self.errmsg, self)
2854
2855
2856
class Literal(Token):
2857
"""Token to exactly match a specified string.
2858
2859
Example::
2860
2861
Literal('blah').parseString('blah') # -> ['blah']
2862
Literal('blah').parseString('blahfooblah') # -> ['blah']
2863
Literal('blah').parseString('bla') # -> Exception: Expected "blah"
2864
2865
For case-insensitive matching, use :class:`CaselessLiteral`.
2866
2867
For keyword matching (force word break before and after the matched string),
2868
use :class:`Keyword` or :class:`CaselessKeyword`.
2869
"""
2870
def __init__(self, matchString):
2871
super(Literal, self).__init__()
2872
self.match = matchString
2873
self.matchLen = len(matchString)
2874
try:
2875
self.firstMatchChar = matchString[0]
2876
except IndexError:
2877
warnings.warn("null string passed to Literal; use Empty() instead",
2878
SyntaxWarning, stacklevel=2)
2879
self.__class__ = Empty
2880
self.name = '"%s"' % _ustr(self.match)
2881
self.errmsg = "Expected " + self.name
2882
self.mayReturnEmpty = False
2883
self.mayIndexError = False
2884
2885
# Performance tuning: modify __class__ to select
2886
# a parseImpl optimized for single-character check
2887
if self.matchLen == 1 and type(self) is Literal:
2888
self.__class__ = _SingleCharLiteral
2889
2890
def parseImpl(self, instring, loc, doActions=True):
2891
if instring[loc] == self.firstMatchChar and instring.startswith(self.match, loc):
2892
return loc + self.matchLen, self.match
2893
raise ParseException(instring, loc, self.errmsg, self)
2894
2895
class _SingleCharLiteral(Literal):
2896
def parseImpl(self, instring, loc, doActions=True):
2897
if instring[loc] == self.firstMatchChar:
2898
return loc + 1, self.match
2899
raise ParseException(instring, loc, self.errmsg, self)
2900
2901
_L = Literal
2902
ParserElement._literalStringClass = Literal
2903
2904
class Keyword(Token):
2905
"""Token to exactly match a specified string as a keyword, that is,
2906
it must be immediately followed by a non-keyword character. Compare
2907
with :class:`Literal`:
2908
2909
- ``Literal("if")`` will match the leading ``'if'`` in
2910
``'ifAndOnlyIf'``.
2911
- ``Keyword("if")`` will not; it will only match the leading
2912
``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2913
2914
Accepts two optional constructor arguments in addition to the
2915
keyword string:
2916
2917
- ``identChars`` is a string of characters that would be valid
2918
identifier characters, defaulting to all alphanumerics + "_" and
2919
"$"
2920
- ``caseless`` allows case-insensitive matching, default is ``False``.
2921
2922
Example::
2923
2924
Keyword("start").parseString("start") # -> ['start']
2925
Keyword("start").parseString("starting") # -> Exception
2926
2927
For case-insensitive matching, use :class:`CaselessKeyword`.
2928
"""
2929
DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2930
2931
def __init__(self, matchString, identChars=None, caseless=False):
2932
super(Keyword, self).__init__()
2933
if identChars is None:
2934
identChars = Keyword.DEFAULT_KEYWORD_CHARS
2935
self.match = matchString
2936
self.matchLen = len(matchString)
2937
try:
2938
self.firstMatchChar = matchString[0]
2939
except IndexError:
2940
warnings.warn("null string passed to Keyword; use Empty() instead",
2941
SyntaxWarning, stacklevel=2)
2942
self.name = '"%s"' % self.match
2943
self.errmsg = "Expected " + self.name
2944
self.mayReturnEmpty = False
2945
self.mayIndexError = False
2946
self.caseless = caseless
2947
if caseless:
2948
self.caselessmatch = matchString.upper()
2949
identChars = identChars.upper()
2950
self.identChars = set(identChars)
2951
2952
def parseImpl(self, instring, loc, doActions=True):
2953
if self.caseless:
2954
if ((instring[loc:loc + self.matchLen].upper() == self.caselessmatch)
2955
and (loc >= len(instring) - self.matchLen
2956
or instring[loc + self.matchLen].upper() not in self.identChars)
2957
and (loc == 0
2958
or instring[loc - 1].upper() not in self.identChars)):
2959
return loc + self.matchLen, self.match
2960
2961
else:
2962
if instring[loc] == self.firstMatchChar:
2963
if ((self.matchLen == 1 or instring.startswith(self.match, loc))
2964
and (loc >= len(instring) - self.matchLen
2965
or instring[loc + self.matchLen] not in self.identChars)
2966
and (loc == 0 or instring[loc - 1] not in self.identChars)):
2967
return loc + self.matchLen, self.match
2968
2969
raise ParseException(instring, loc, self.errmsg, self)
2970
2971
def copy(self):
2972
c = super(Keyword, self).copy()
2973
c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
2974
return c
2975
2976
@staticmethod
2977
def setDefaultKeywordChars(chars):
2978
"""Overrides the default Keyword chars
2979
"""
2980
Keyword.DEFAULT_KEYWORD_CHARS = chars
2981
2982
class CaselessLiteral(Literal):
2983
"""Token to match a specified string, ignoring case of letters.
2984
Note: the matched results will always be in the case of the given
2985
match string, NOT the case of the input text.
2986
2987
Example::
2988
2989
OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2990
2991
(Contrast with example for :class:`CaselessKeyword`.)
2992
"""
2993
def __init__(self, matchString):
2994
super(CaselessLiteral, self).__init__(matchString.upper())
2995
# Preserve the defining literal.
2996
self.returnString = matchString
2997
self.name = "'%s'" % self.returnString
2998
self.errmsg = "Expected " + self.name
2999
3000
def parseImpl(self, instring, loc, doActions=True):
3001
if instring[loc:loc + self.matchLen].upper() == self.match:
3002
return loc + self.matchLen, self.returnString
3003
raise ParseException(instring, loc, self.errmsg, self)
3004
3005
class CaselessKeyword(Keyword):
3006
"""
3007
Caseless version of :class:`Keyword`.
3008
3009
Example::
3010
3011
OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
3012
3013
(Contrast with example for :class:`CaselessLiteral`.)
3014
"""
3015
def __init__(self, matchString, identChars=None):
3016
super(CaselessKeyword, self).__init__(matchString, identChars, caseless=True)
3017
3018
class CloseMatch(Token):
3019
"""A variation on :class:`Literal` which matches "close" matches,
3020
that is, strings with at most 'n' mismatching characters.
3021
:class:`CloseMatch` takes parameters:
3022
3023
- ``match_string`` - string to be matched
3024
- ``maxMismatches`` - (``default=1``) maximum number of
3025
mismatches allowed to count as a match
3026
3027
The results from a successful parse will contain the matched text
3028
from the input string and the following named results:
3029
3030
- ``mismatches`` - a list of the positions within the
3031
match_string where mismatches were found
3032
- ``original`` - the original match_string used to compare
3033
against the input string
3034
3035
If ``mismatches`` is an empty list, then the match was an exact
3036
match.
3037
3038
Example::
3039
3040
patt = CloseMatch("ATCATCGAATGGA")
3041
patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
3042
patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
3043
3044
# exact match
3045
patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
3046
3047
# close match allowing up to 2 mismatches
3048
patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
3049
patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
3050
"""
3051
def __init__(self, match_string, maxMismatches=1):
3052
super(CloseMatch, self).__init__()
3053
self.name = match_string
3054
self.match_string = match_string
3055
self.maxMismatches = maxMismatches
3056
self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
3057
self.mayIndexError = False
3058
self.mayReturnEmpty = False
3059
3060
def parseImpl(self, instring, loc, doActions=True):
3061
start = loc
3062
instrlen = len(instring)
3063
maxloc = start + len(self.match_string)
3064
3065
if maxloc <= instrlen:
3066
match_string = self.match_string
3067
match_stringloc = 0
3068
mismatches = []
3069
maxMismatches = self.maxMismatches
3070
3071
for match_stringloc, s_m in enumerate(zip(instring[loc:maxloc], match_string)):
3072
src, mat = s_m
3073
if src != mat:
3074
mismatches.append(match_stringloc)
3075
if len(mismatches) > maxMismatches:
3076
break
3077
else:
3078
loc = match_stringloc + 1
3079
results = ParseResults([instring[start:loc]])
3080
results['original'] = match_string
3081
results['mismatches'] = mismatches
3082
return loc, results
3083
3084
raise ParseException(instring, loc, self.errmsg, self)
3085
3086
3087
class Word(Token):
3088
"""Token for matching words composed of allowed character sets.
3089
Defined with string containing all allowed initial characters, an
3090
optional string containing allowed body characters (if omitted,
3091
defaults to the initial character set), and an optional minimum,
3092
maximum, and/or exact length. The default value for ``min`` is
3093
1 (a minimum value < 1 is not valid); the default values for
3094
``max`` and ``exact`` are 0, meaning no maximum or exact
3095
length restriction. An optional ``excludeChars`` parameter can
3096
list characters that might be found in the input ``bodyChars``
3097
string; useful to define a word of all printables except for one or
3098
two characters, for instance.
3099
3100
:class:`srange` is useful for defining custom character set strings
3101
for defining ``Word`` expressions, using range notation from
3102
regular expression character sets.
3103
3104
A common mistake is to use :class:`Word` to match a specific literal
3105
string, as in ``Word("Address")``. Remember that :class:`Word`
3106
uses the string argument to define *sets* of matchable characters.
3107
This expression would match "Add", "AAA", "dAred", or any other word
3108
made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
3109
exact literal string, use :class:`Literal` or :class:`Keyword`.
3110
3111
pyparsing includes helper strings for building Words:
3112
3113
- :class:`alphas`
3114
- :class:`nums`
3115
- :class:`alphanums`
3116
- :class:`hexnums`
3117
- :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
3118
- accented, tilded, umlauted, etc.)
3119
- :class:`punc8bit` (non-alphabetic characters in ASCII range
3120
128-255 - currency, symbols, superscripts, diacriticals, etc.)
3121
- :class:`printables` (any non-whitespace character)
3122
3123
Example::
3124
3125
# a word composed of digits
3126
integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
3127
3128
# a word with a leading capital, and zero or more lowercase
3129
capital_word = Word(alphas.upper(), alphas.lower())
3130
3131
# hostnames are alphanumeric, with leading alpha, and '-'
3132
hostname = Word(alphas, alphanums + '-')
3133
3134
# roman numeral (not a strict parser, accepts invalid mix of characters)
3135
roman = Word("IVXLCDM")
3136
3137
# any string of non-whitespace characters, except for ','
3138
csv_value = Word(printables, excludeChars=",")
3139
"""
3140
def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None):
3141
super(Word, self).__init__()
3142
if excludeChars:
3143
excludeChars = set(excludeChars)
3144
initChars = ''.join(c for c in initChars if c not in excludeChars)
3145
if bodyChars:
3146
bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
3147
self.initCharsOrig = initChars
3148
self.initChars = set(initChars)
3149
if bodyChars:
3150
self.bodyCharsOrig = bodyChars
3151
self.bodyChars = set(bodyChars)
3152
else:
3153
self.bodyCharsOrig = initChars
3154
self.bodyChars = set(initChars)
3155
3156
self.maxSpecified = max > 0
3157
3158
if min < 1:
3159
raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
3160
3161
self.minLen = min
3162
3163
if max > 0:
3164
self.maxLen = max
3165
else:
3166
self.maxLen = _MAX_INT
3167
3168
if exact > 0:
3169
self.maxLen = exact
3170
self.minLen = exact
3171
3172
self.name = _ustr(self)
3173
self.errmsg = "Expected " + self.name
3174
self.mayIndexError = False
3175
self.asKeyword = asKeyword
3176
3177
if ' ' not in self.initCharsOrig + self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0):
3178
if self.bodyCharsOrig == self.initCharsOrig:
3179
self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
3180
elif len(self.initCharsOrig) == 1:
3181
self.reString = "%s[%s]*" % (re.escape(self.initCharsOrig),
3182
_escapeRegexRangeChars(self.bodyCharsOrig),)
3183
else:
3184
self.reString = "[%s][%s]*" % (_escapeRegexRangeChars(self.initCharsOrig),
3185
_escapeRegexRangeChars(self.bodyCharsOrig),)
3186
if self.asKeyword:
3187
self.reString = r"\b" + self.reString + r"\b"
3188
3189
try:
3190
self.re = re.compile(self.reString)
3191
except Exception:
3192
self.re = None
3193
else:
3194
self.re_match = self.re.match
3195
self.__class__ = _WordRegex
3196
3197
def parseImpl(self, instring, loc, doActions=True):
3198
if instring[loc] not in self.initChars:
3199
raise ParseException(instring, loc, self.errmsg, self)
3200
3201
start = loc
3202
loc += 1
3203
instrlen = len(instring)
3204
bodychars = self.bodyChars
3205
maxloc = start + self.maxLen
3206
maxloc = min(maxloc, instrlen)
3207
while loc < maxloc and instring[loc] in bodychars:
3208
loc += 1
3209
3210
throwException = False
3211
if loc - start < self.minLen:
3212
throwException = True
3213
elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
3214
throwException = True
3215
elif self.asKeyword:
3216
if (start > 0 and instring[start - 1] in bodychars
3217
or loc < instrlen and instring[loc] in bodychars):
3218
throwException = True
3219
3220
if throwException:
3221
raise ParseException(instring, loc, self.errmsg, self)
3222
3223
return loc, instring[start:loc]
3224
3225
def __str__(self):
3226
try:
3227
return super(Word, self).__str__()
3228
except Exception:
3229
pass
3230
3231
if self.strRepr is None:
3232
3233
def charsAsStr(s):
3234
if len(s) > 4:
3235
return s[:4] + "..."
3236
else:
3237
return s
3238
3239
if self.initCharsOrig != self.bodyCharsOrig:
3240
self.strRepr = "W:(%s, %s)" % (charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig))
3241
else:
3242
self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
3243
3244
return self.strRepr
3245
3246
class _WordRegex(Word):
3247
def parseImpl(self, instring, loc, doActions=True):
3248
result = self.re_match(instring, loc)
3249
if not result:
3250
raise ParseException(instring, loc, self.errmsg, self)
3251
3252
loc = result.end()
3253
return loc, result.group()
3254
3255
3256
class Char(_WordRegex):
3257
"""A short-cut class for defining ``Word(characters, exact=1)``,
3258
when defining a match of any single character in a string of
3259
characters.
3260
"""
3261
def __init__(self, charset, asKeyword=False, excludeChars=None):
3262
super(Char, self).__init__(charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars)
3263
self.reString = "[%s]" % _escapeRegexRangeChars(''.join(self.initChars))
3264
if asKeyword:
3265
self.reString = r"\b%s\b" % self.reString
3266
self.re = re.compile(self.reString)
3267
self.re_match = self.re.match
3268
3269
3270
class Regex(Token):
3271
r"""Token for matching strings that match a given regular
3272
expression. Defined with string specifying the regular expression in
3273
a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
3274
If the given regex contains named groups (defined using ``(?P<name>...)``),
3275
these will be preserved as named parse results.
3276
3277
If instead of the Python stdlib re module you wish to use a different RE module
3278
(such as the `regex` module), you can replace it by either building your
3279
Regex object with a compiled RE that was compiled using regex:
3280
3281
Example::
3282
3283
realnum = Regex(r"[+-]?\d+\.\d*")
3284
date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3285
# ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3286
roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3287
3288
# use regex module instead of stdlib re module to construct a Regex using
3289
# a compiled regular expression
3290
import regex
3291
parser = pp.Regex(regex.compile(r'[0-9]'))
3292
3293
"""
3294
def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False):
3295
"""The parameters ``pattern`` and ``flags`` are passed
3296
to the ``re.compile()`` function as-is. See the Python
3297
`re module <https://docs.python.org/3/library/re.html>`_ module for an
3298
explanation of the acceptable patterns and flags.
3299
"""
3300
super(Regex, self).__init__()
3301
3302
if isinstance(pattern, basestring):
3303
if not pattern:
3304
warnings.warn("null string passed to Regex; use Empty() instead",
3305
SyntaxWarning, stacklevel=2)
3306
3307
self.pattern = pattern
3308
self.flags = flags
3309
3310
try:
3311
self.re = re.compile(self.pattern, self.flags)
3312
self.reString = self.pattern
3313
except sre_constants.error:
3314
warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
3315
SyntaxWarning, stacklevel=2)
3316
raise
3317
3318
elif hasattr(pattern, 'pattern') and hasattr(pattern, 'match'):
3319
self.re = pattern
3320
self.pattern = self.reString = pattern.pattern
3321
self.flags = flags
3322
3323
else:
3324
raise TypeError("Regex may only be constructed with a string or a compiled RE object")
3325
3326
self.re_match = self.re.match
3327
3328
self.name = _ustr(self)
3329
self.errmsg = "Expected " + self.name
3330
self.mayIndexError = False
3331
self.mayReturnEmpty = self.re_match("") is not None
3332
self.asGroupList = asGroupList
3333
self.asMatch = asMatch
3334
if self.asGroupList:
3335
self.parseImpl = self.parseImplAsGroupList
3336
if self.asMatch:
3337
self.parseImpl = self.parseImplAsMatch
3338
3339
def parseImpl(self, instring, loc, doActions=True):
3340
result = self.re_match(instring, loc)
3341
if not result:
3342
raise ParseException(instring, loc, self.errmsg, self)
3343
3344
loc = result.end()
3345
ret = ParseResults(result.group())
3346
d = result.groupdict()
3347
if d:
3348
for k, v in d.items():
3349
ret[k] = v
3350
return loc, ret
3351
3352
def parseImplAsGroupList(self, instring, loc, doActions=True):
3353
result = self.re_match(instring, loc)
3354
if not result:
3355
raise ParseException(instring, loc, self.errmsg, self)
3356
3357
loc = result.end()
3358
ret = result.groups()
3359
return loc, ret
3360
3361
def parseImplAsMatch(self, instring, loc, doActions=True):
3362
result = self.re_match(instring, loc)
3363
if not result:
3364
raise ParseException(instring, loc, self.errmsg, self)
3365
3366
loc = result.end()
3367
ret = result
3368
return loc, ret
3369
3370
def __str__(self):
3371
try:
3372
return super(Regex, self).__str__()
3373
except Exception:
3374
pass
3375
3376
if self.strRepr is None:
3377
self.strRepr = "Re:(%s)" % repr(self.pattern)
3378
3379
return self.strRepr
3380
3381
def sub(self, repl):
3382
r"""
3383
Return Regex with an attached parse action to transform the parsed
3384
result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3385
3386
Example::
3387
3388
make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3389
print(make_html.transformString("h1:main title:"))
3390
# prints "<h1>main title</h1>"
3391
"""
3392
if self.asGroupList:
3393
warnings.warn("cannot use sub() with Regex(asGroupList=True)",
3394
SyntaxWarning, stacklevel=2)
3395
raise SyntaxError()
3396
3397
if self.asMatch and callable(repl):
3398
warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)",
3399
SyntaxWarning, stacklevel=2)
3400
raise SyntaxError()
3401
3402
if self.asMatch:
3403
def pa(tokens):
3404
return tokens[0].expand(repl)
3405
else:
3406
def pa(tokens):
3407
return self.re.sub(repl, tokens[0])
3408
return self.addParseAction(pa)
3409
3410
class QuotedString(Token):
3411
r"""
3412
Token for matching strings that are delimited by quoting characters.
3413
3414
Defined with the following parameters:
3415
3416
- quoteChar - string of one or more characters defining the
3417
quote delimiting string
3418
- escChar - character to escape quotes, typically backslash
3419
(default= ``None``)
3420
- escQuote - special quote sequence to escape an embedded quote
3421
string (such as SQL's ``""`` to escape an embedded ``"``)
3422
(default= ``None``)
3423
- multiline - boolean indicating whether quotes can span
3424
multiple lines (default= ``False``)
3425
- unquoteResults - boolean indicating whether the matched text
3426
should be unquoted (default= ``True``)
3427
- endQuoteChar - string of one or more characters defining the
3428
end of the quote delimited string (default= ``None`` => same as
3429
quoteChar)
3430
- convertWhitespaceEscapes - convert escaped whitespace
3431
(``'\t'``, ``'\n'``, etc.) to actual whitespace
3432
(default= ``True``)
3433
3434
Example::
3435
3436
qs = QuotedString('"')
3437
print(qs.searchString('lsjdf "This is the quote" sldjf'))
3438
complex_qs = QuotedString('{{', endQuoteChar='}}')
3439
print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
3440
sql_qs = QuotedString('"', escQuote='""')
3441
print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3442
3443
prints::
3444
3445
[['This is the quote']]
3446
[['This is the "quote"']]
3447
[['This is the quote with "embedded" quotes']]
3448
"""
3449
def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False,
3450
unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
3451
super(QuotedString, self).__init__()
3452
3453
# remove white space from quote chars - wont work anyway
3454
quoteChar = quoteChar.strip()
3455
if not quoteChar:
3456
warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2)
3457
raise SyntaxError()
3458
3459
if endQuoteChar is None:
3460
endQuoteChar = quoteChar
3461
else:
3462
endQuoteChar = endQuoteChar.strip()
3463
if not endQuoteChar:
3464
warnings.warn("endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2)
3465
raise SyntaxError()
3466
3467
self.quoteChar = quoteChar
3468
self.quoteCharLen = len(quoteChar)
3469
self.firstQuoteChar = quoteChar[0]
3470
self.endQuoteChar = endQuoteChar
3471
self.endQuoteCharLen = len(endQuoteChar)
3472
self.escChar = escChar
3473
self.escQuote = escQuote
3474
self.unquoteResults = unquoteResults
3475
self.convertWhitespaceEscapes = convertWhitespaceEscapes
3476
3477
if multiline:
3478
self.flags = re.MULTILINE | re.DOTALL
3479
self.pattern = r'%s(?:[^%s%s]' % (re.escape(self.quoteChar),
3480
_escapeRegexRangeChars(self.endQuoteChar[0]),
3481
(escChar is not None and _escapeRegexRangeChars(escChar) or ''))
3482
else:
3483
self.flags = 0
3484
self.pattern = r'%s(?:[^%s\n\r%s]' % (re.escape(self.quoteChar),
3485
_escapeRegexRangeChars(self.endQuoteChar[0]),
3486
(escChar is not None and _escapeRegexRangeChars(escChar) or ''))
3487
if len(self.endQuoteChar) > 1:
3488
self.pattern += (
3489
'|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
3490
_escapeRegexRangeChars(self.endQuoteChar[i]))
3491
for i in range(len(self.endQuoteChar) - 1, 0, -1)) + ')')
3492
3493
if escQuote:
3494
self.pattern += (r'|(?:%s)' % re.escape(escQuote))
3495
if escChar:
3496
self.pattern += (r'|(?:%s.)' % re.escape(escChar))
3497
self.escCharReplacePattern = re.escape(self.escChar) + "(.)"
3498
self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
3499
3500
try:
3501
self.re = re.compile(self.pattern, self.flags)
3502
self.reString = self.pattern
3503
self.re_match = self.re.match
3504
except sre_constants.error:
3505
warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
3506
SyntaxWarning, stacklevel=2)
3507
raise
3508
3509
self.name = _ustr(self)
3510
self.errmsg = "Expected " + self.name
3511
self.mayIndexError = False
3512
self.mayReturnEmpty = True
3513
3514
def parseImpl(self, instring, loc, doActions=True):
3515
result = instring[loc] == self.firstQuoteChar and self.re_match(instring, loc) or None
3516
if not result:
3517
raise ParseException(instring, loc, self.errmsg, self)
3518
3519
loc = result.end()
3520
ret = result.group()
3521
3522
if self.unquoteResults:
3523
3524
# strip off quotes
3525
ret = ret[self.quoteCharLen: -self.endQuoteCharLen]
3526
3527
if isinstance(ret, basestring):
3528
# replace escaped whitespace
3529
if '\\' in ret and self.convertWhitespaceEscapes:
3530
ws_map = {
3531
r'\t': '\t',
3532
r'\n': '\n',
3533
r'\f': '\f',
3534
r'\r': '\r',
3535
}
3536
for wslit, wschar in ws_map.items():
3537
ret = ret.replace(wslit, wschar)
3538
3539
# replace escaped characters
3540
if self.escChar:
3541
ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
3542
3543
# replace escaped quotes
3544
if self.escQuote:
3545
ret = ret.replace(self.escQuote, self.endQuoteChar)
3546
3547
return loc, ret
3548
3549
def __str__(self):
3550
try:
3551
return super(QuotedString, self).__str__()
3552
except Exception:
3553
pass
3554
3555
if self.strRepr is None:
3556
self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
3557
3558
return self.strRepr
3559
3560
3561
class CharsNotIn(Token):
3562
"""Token for matching words composed of characters *not* in a given
3563
set (will include whitespace in matched characters if not listed in
3564
the provided exclusion set - see example). Defined with string
3565
containing all disallowed characters, and an optional minimum,
3566
maximum, and/or exact length. The default value for ``min`` is
3567
1 (a minimum value < 1 is not valid); the default values for
3568
``max`` and ``exact`` are 0, meaning no maximum or exact
3569
length restriction.
3570
3571
Example::
3572
3573
# define a comma-separated-value as anything that is not a ','
3574
csv_value = CharsNotIn(',')
3575
print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
3576
3577
prints::
3578
3579
['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3580
"""
3581
def __init__(self, notChars, min=1, max=0, exact=0):
3582
super(CharsNotIn, self).__init__()
3583
self.skipWhitespace = False
3584
self.notChars = notChars
3585
3586
if min < 1:
3587
raise ValueError("cannot specify a minimum length < 1; use "
3588
"Optional(CharsNotIn()) if zero-length char group is permitted")
3589
3590
self.minLen = min
3591
3592
if max > 0:
3593
self.maxLen = max
3594
else:
3595
self.maxLen = _MAX_INT
3596
3597
if exact > 0:
3598
self.maxLen = exact
3599
self.minLen = exact
3600
3601
self.name = _ustr(self)
3602
self.errmsg = "Expected " + self.name
3603
self.mayReturnEmpty = (self.minLen == 0)
3604
self.mayIndexError = False
3605
3606
def parseImpl(self, instring, loc, doActions=True):
3607
if instring[loc] in self.notChars:
3608
raise ParseException(instring, loc, self.errmsg, self)
3609
3610
start = loc
3611
loc += 1
3612
notchars = self.notChars
3613
maxlen = min(start + self.maxLen, len(instring))
3614
while loc < maxlen and instring[loc] not in notchars:
3615
loc += 1
3616
3617
if loc - start < self.minLen:
3618
raise ParseException(instring, loc, self.errmsg, self)
3619
3620
return loc, instring[start:loc]
3621
3622
def __str__(self):
3623
try:
3624
return super(CharsNotIn, self).__str__()
3625
except Exception:
3626
pass
3627
3628
if self.strRepr is None:
3629
if len(self.notChars) > 4:
3630
self.strRepr = "!W:(%s...)" % self.notChars[:4]
3631
else:
3632
self.strRepr = "!W:(%s)" % self.notChars
3633
3634
return self.strRepr
3635
3636
class White(Token):
3637
"""Special matching class for matching whitespace. Normally,
3638
whitespace is ignored by pyparsing grammars. This class is included
3639
when some whitespace structures are significant. Define with
3640
a string containing the whitespace characters to be matched; default
3641
is ``" \\t\\r\\n"``. Also takes optional ``min``,
3642
``max``, and ``exact`` arguments, as defined for the
3643
:class:`Word` class.
3644
"""
3645
whiteStrs = {
3646
' ' : '<SP>',
3647
'\t': '<TAB>',
3648
'\n': '<LF>',
3649
'\r': '<CR>',
3650
'\f': '<FF>',
3651
u'\u00A0': '<NBSP>',
3652
u'\u1680': '<OGHAM_SPACE_MARK>',
3653
u'\u180E': '<MONGOLIAN_VOWEL_SEPARATOR>',
3654
u'\u2000': '<EN_QUAD>',
3655
u'\u2001': '<EM_QUAD>',
3656
u'\u2002': '<EN_SPACE>',
3657
u'\u2003': '<EM_SPACE>',
3658
u'\u2004': '<THREE-PER-EM_SPACE>',
3659
u'\u2005': '<FOUR-PER-EM_SPACE>',
3660
u'\u2006': '<SIX-PER-EM_SPACE>',
3661
u'\u2007': '<FIGURE_SPACE>',
3662
u'\u2008': '<PUNCTUATION_SPACE>',
3663
u'\u2009': '<THIN_SPACE>',
3664
u'\u200A': '<HAIR_SPACE>',
3665
u'\u200B': '<ZERO_WIDTH_SPACE>',
3666
u'\u202F': '<NNBSP>',
3667
u'\u205F': '<MMSP>',
3668
u'\u3000': '<IDEOGRAPHIC_SPACE>',
3669
}
3670
def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3671
super(White, self).__init__()
3672
self.matchWhite = ws
3673
self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite))
3674
# ~ self.leaveWhitespace()
3675
self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3676
self.mayReturnEmpty = True
3677
self.errmsg = "Expected " + self.name
3678
3679
self.minLen = min
3680
3681
if max > 0:
3682
self.maxLen = max
3683
else:
3684
self.maxLen = _MAX_INT
3685
3686
if exact > 0:
3687
self.maxLen = exact
3688
self.minLen = exact
3689
3690
def parseImpl(self, instring, loc, doActions=True):
3691
if instring[loc] not in self.matchWhite:
3692
raise ParseException(instring, loc, self.errmsg, self)
3693
start = loc
3694
loc += 1
3695
maxloc = start + self.maxLen
3696
maxloc = min(maxloc, len(instring))
3697
while loc < maxloc and instring[loc] in self.matchWhite:
3698
loc += 1
3699
3700
if loc - start < self.minLen:
3701
raise ParseException(instring, loc, self.errmsg, self)
3702
3703
return loc, instring[start:loc]
3704
3705
3706
class _PositionToken(Token):
3707
def __init__(self):
3708
super(_PositionToken, self).__init__()
3709
self.name = self.__class__.__name__
3710
self.mayReturnEmpty = True
3711
self.mayIndexError = False
3712
3713
class GoToColumn(_PositionToken):
3714
"""Token to advance to a specific column of input text; useful for
3715
tabular report scraping.
3716
"""
3717
def __init__(self, colno):
3718
super(GoToColumn, self).__init__()
3719
self.col = colno
3720
3721
def preParse(self, instring, loc):
3722
if col(loc, instring) != self.col:
3723
instrlen = len(instring)
3724
if self.ignoreExprs:
3725
loc = self._skipIgnorables(instring, loc)
3726
while loc < instrlen and instring[loc].isspace() and col(loc, instring) != self.col:
3727
loc += 1
3728
return loc
3729
3730
def parseImpl(self, instring, loc, doActions=True):
3731
thiscol = col(loc, instring)
3732
if thiscol > self.col:
3733
raise ParseException(instring, loc, "Text not in expected column", self)
3734
newloc = loc + self.col - thiscol
3735
ret = instring[loc: newloc]
3736
return newloc, ret
3737
3738
3739
class LineStart(_PositionToken):
3740
r"""Matches if current position is at the beginning of a line within
3741
the parse string
3742
3743
Example::
3744
3745
test = '''\
3746
AAA this line
3747
AAA and this line
3748
AAA but not this one
3749
B AAA and definitely not this one
3750
'''
3751
3752
for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3753
print(t)
3754
3755
prints::
3756
3757
['AAA', ' this line']
3758
['AAA', ' and this line']
3759
3760
"""
3761
def __init__(self):
3762
super(LineStart, self).__init__()
3763
self.errmsg = "Expected start of line"
3764
3765
def parseImpl(self, instring, loc, doActions=True):
3766
if col(loc, instring) == 1:
3767
return loc, []
3768
raise ParseException(instring, loc, self.errmsg, self)
3769
3770
class LineEnd(_PositionToken):
3771
"""Matches if current position is at the end of a line within the
3772
parse string
3773
"""
3774
def __init__(self):
3775
super(LineEnd, self).__init__()
3776
self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", ""))
3777
self.errmsg = "Expected end of line"
3778
3779
def parseImpl(self, instring, loc, doActions=True):
3780
if loc < len(instring):
3781
if instring[loc] == "\n":
3782
return loc + 1, "\n"
3783
else:
3784
raise ParseException(instring, loc, self.errmsg, self)
3785
elif loc == len(instring):
3786
return loc + 1, []
3787
else:
3788
raise ParseException(instring, loc, self.errmsg, self)
3789
3790
class StringStart(_PositionToken):
3791
"""Matches if current position is at the beginning of the parse
3792
string
3793
"""
3794
def __init__(self):
3795
super(StringStart, self).__init__()
3796
self.errmsg = "Expected start of text"
3797
3798
def parseImpl(self, instring, loc, doActions=True):
3799
if loc != 0:
3800
# see if entire string up to here is just whitespace and ignoreables
3801
if loc != self.preParse(instring, 0):
3802
raise ParseException(instring, loc, self.errmsg, self)
3803
return loc, []
3804
3805
class StringEnd(_PositionToken):
3806
"""Matches if current position is at the end of the parse string
3807
"""
3808
def __init__(self):
3809
super(StringEnd, self).__init__()
3810
self.errmsg = "Expected end of text"
3811
3812
def parseImpl(self, instring, loc, doActions=True):
3813
if loc < len(instring):
3814
raise ParseException(instring, loc, self.errmsg, self)
3815
elif loc == len(instring):
3816
return loc + 1, []
3817
elif loc > len(instring):
3818
return loc, []
3819
else:
3820
raise ParseException(instring, loc, self.errmsg, self)
3821
3822
class WordStart(_PositionToken):
3823
"""Matches if the current position is at the beginning of a Word,
3824
and is not preceded by any character in a given set of
3825
``wordChars`` (default= ``printables``). To emulate the
3826
``\b`` behavior of regular expressions, use
3827
``WordStart(alphanums)``. ``WordStart`` will also match at
3828
the beginning of the string being parsed, or at the beginning of
3829
a line.
3830
"""
3831
def __init__(self, wordChars=printables):
3832
super(WordStart, self).__init__()
3833
self.wordChars = set(wordChars)
3834
self.errmsg = "Not at the start of a word"
3835
3836
def parseImpl(self, instring, loc, doActions=True):
3837
if loc != 0:
3838
if (instring[loc - 1] in self.wordChars
3839
or instring[loc] not in self.wordChars):
3840
raise ParseException(instring, loc, self.errmsg, self)
3841
return loc, []
3842
3843
class WordEnd(_PositionToken):
3844
"""Matches if the current position is at the end of a Word, and is
3845
not followed by any character in a given set of ``wordChars``
3846
(default= ``printables``). To emulate the ``\b`` behavior of
3847
regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3848
will also match at the end of the string being parsed, or at the end
3849
of a line.
3850
"""
3851
def __init__(self, wordChars=printables):
3852
super(WordEnd, self).__init__()
3853
self.wordChars = set(wordChars)
3854
self.skipWhitespace = False
3855
self.errmsg = "Not at the end of a word"
3856
3857
def parseImpl(self, instring, loc, doActions=True):
3858
instrlen = len(instring)
3859
if instrlen > 0 and loc < instrlen:
3860
if (instring[loc] in self.wordChars or
3861
instring[loc - 1] not in self.wordChars):
3862
raise ParseException(instring, loc, self.errmsg, self)
3863
return loc, []
3864
3865
3866
class ParseExpression(ParserElement):
3867
"""Abstract subclass of ParserElement, for combining and
3868
post-processing parsed tokens.
3869
"""
3870
def __init__(self, exprs, savelist=False):
3871
super(ParseExpression, self).__init__(savelist)
3872
if isinstance(exprs, _generatorType):
3873
exprs = list(exprs)
3874
3875
if isinstance(exprs, basestring):
3876
self.exprs = [self._literalStringClass(exprs)]
3877
elif isinstance(exprs, ParserElement):
3878
self.exprs = [exprs]
3879
elif isinstance(exprs, Iterable):
3880
exprs = list(exprs)
3881
# if sequence of strings provided, wrap with Literal
3882
if any(isinstance(expr, basestring) for expr in exprs):
3883
exprs = (self._literalStringClass(e) if isinstance(e, basestring) else e for e in exprs)
3884
self.exprs = list(exprs)
3885
else:
3886
try:
3887
self.exprs = list(exprs)
3888
except TypeError:
3889
self.exprs = [exprs]
3890
self.callPreparse = False
3891
3892
def append(self, other):
3893
self.exprs.append(other)
3894
self.strRepr = None
3895
return self
3896
3897
def leaveWhitespace(self):
3898
"""Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on
3899
all contained expressions."""
3900
self.skipWhitespace = False
3901
self.exprs = [e.copy() for e in self.exprs]
3902
for e in self.exprs:
3903
e.leaveWhitespace()
3904
return self
3905
3906
def ignore(self, other):
3907
if isinstance(other, Suppress):
3908
if other not in self.ignoreExprs:
3909
super(ParseExpression, self).ignore(other)
3910
for e in self.exprs:
3911
e.ignore(self.ignoreExprs[-1])
3912
else:
3913
super(ParseExpression, self).ignore(other)
3914
for e in self.exprs:
3915
e.ignore(self.ignoreExprs[-1])
3916
return self
3917
3918
def __str__(self):
3919
try:
3920
return super(ParseExpression, self).__str__()
3921
except Exception:
3922
pass
3923
3924
if self.strRepr is None:
3925
self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.exprs))
3926
return self.strRepr
3927
3928
def streamline(self):
3929
super(ParseExpression, self).streamline()
3930
3931
for e in self.exprs:
3932
e.streamline()
3933
3934
# collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d)
3935
# but only if there are no parse actions or resultsNames on the nested And's
3936
# (likewise for Or's and MatchFirst's)
3937
if len(self.exprs) == 2:
3938
other = self.exprs[0]
3939
if (isinstance(other, self.__class__)
3940
and not other.parseAction
3941
and other.resultsName is None
3942
and not other.debug):
3943
self.exprs = other.exprs[:] + [self.exprs[1]]
3944
self.strRepr = None
3945
self.mayReturnEmpty |= other.mayReturnEmpty
3946
self.mayIndexError |= other.mayIndexError
3947
3948
other = self.exprs[-1]
3949
if (isinstance(other, self.__class__)
3950
and not other.parseAction
3951
and other.resultsName is None
3952
and not other.debug):
3953
self.exprs = self.exprs[:-1] + other.exprs[:]
3954
self.strRepr = None
3955
self.mayReturnEmpty |= other.mayReturnEmpty
3956
self.mayIndexError |= other.mayIndexError
3957
3958
self.errmsg = "Expected " + _ustr(self)
3959
3960
return self
3961
3962
def validate(self, validateTrace=None):
3963
tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
3964
for e in self.exprs:
3965
e.validate(tmp)
3966
self.checkRecursion([])
3967
3968
def copy(self):
3969
ret = super(ParseExpression, self).copy()
3970
ret.exprs = [e.copy() for e in self.exprs]
3971
return ret
3972
3973
def _setResultsName(self, name, listAllMatches=False):
3974
if __diag__.warn_ungrouped_named_tokens_in_collection:
3975
for e in self.exprs:
3976
if isinstance(e, ParserElement) and e.resultsName:
3977
warnings.warn("{0}: setting results name {1!r} on {2} expression "
3978
"collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
3979
name,
3980
type(self).__name__,
3981
e.resultsName),
3982
stacklevel=3)
3983
3984
return super(ParseExpression, self)._setResultsName(name, listAllMatches)
3985
3986
3987
class And(ParseExpression):
3988
"""
3989
Requires all given :class:`ParseExpression` s to be found in the given order.
3990
Expressions may be separated by whitespace.
3991
May be constructed using the ``'+'`` operator.
3992
May also be constructed using the ``'-'`` operator, which will
3993
suppress backtracking.
3994
3995
Example::
3996
3997
integer = Word(nums)
3998
name_expr = OneOrMore(Word(alphas))
3999
4000
expr = And([integer("id"), name_expr("name"), integer("age")])
4001
# more easily written as:
4002
expr = integer("id") + name_expr("name") + integer("age")
4003
"""
4004
4005
class _ErrorStop(Empty):
4006
def __init__(self, *args, **kwargs):
4007
super(And._ErrorStop, self).__init__(*args, **kwargs)
4008
self.name = '-'
4009
self.leaveWhitespace()
4010
4011
def __init__(self, exprs, savelist=True):
4012
exprs = list(exprs)
4013
if exprs and Ellipsis in exprs:
4014
tmp = []
4015
for i, expr in enumerate(exprs):
4016
if expr is Ellipsis:
4017
if i < len(exprs) - 1:
4018
skipto_arg = (Empty() + exprs[i + 1]).exprs[-1]
4019
tmp.append(SkipTo(skipto_arg)("_skipped*"))
4020
else:
4021
raise Exception("cannot construct And with sequence ending in ...")
4022
else:
4023
tmp.append(expr)
4024
exprs[:] = tmp
4025
super(And, self).__init__(exprs, savelist)
4026
self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4027
self.setWhitespaceChars(self.exprs[0].whiteChars)
4028
self.skipWhitespace = self.exprs[0].skipWhitespace
4029
self.callPreparse = True
4030
4031
def streamline(self):
4032
# collapse any _PendingSkip's
4033
if self.exprs:
4034
if any(isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip)
4035
for e in self.exprs[:-1]):
4036
for i, e in enumerate(self.exprs[:-1]):
4037
if e is None:
4038
continue
4039
if (isinstance(e, ParseExpression)
4040
and e.exprs and isinstance(e.exprs[-1], _PendingSkip)):
4041
e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4042
self.exprs[i + 1] = None
4043
self.exprs = [e for e in self.exprs if e is not None]
4044
4045
super(And, self).streamline()
4046
self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4047
return self
4048
4049
def parseImpl(self, instring, loc, doActions=True):
4050
# pass False as last arg to _parse for first element, since we already
4051
# pre-parsed the string as part of our And pre-parsing
4052
loc, resultlist = self.exprs[0]._parse(instring, loc, doActions, callPreParse=False)
4053
errorStop = False
4054
for e in self.exprs[1:]:
4055
if isinstance(e, And._ErrorStop):
4056
errorStop = True
4057
continue
4058
if errorStop:
4059
try:
4060
loc, exprtokens = e._parse(instring, loc, doActions)
4061
except ParseSyntaxException:
4062
raise
4063
except ParseBaseException as pe:
4064
pe.__traceback__ = None
4065
raise ParseSyntaxException._from_exception(pe)
4066
except IndexError:
4067
raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
4068
else:
4069
loc, exprtokens = e._parse(instring, loc, doActions)
4070
if exprtokens or exprtokens.haskeys():
4071
resultlist += exprtokens
4072
return loc, resultlist
4073
4074
def __iadd__(self, other):
4075
if isinstance(other, basestring):
4076
other = self._literalStringClass(other)
4077
return self.append(other) # And([self, other])
4078
4079
def checkRecursion(self, parseElementList):
4080
subRecCheckList = parseElementList[:] + [self]
4081
for e in self.exprs:
4082
e.checkRecursion(subRecCheckList)
4083
if not e.mayReturnEmpty:
4084
break
4085
4086
def __str__(self):
4087
if hasattr(self, "name"):
4088
return self.name
4089
4090
if self.strRepr is None:
4091
self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
4092
4093
return self.strRepr
4094
4095
4096
class Or(ParseExpression):
4097
"""Requires that at least one :class:`ParseExpression` is found. If
4098
two expressions match, the expression that matches the longest
4099
string will be used. May be constructed using the ``'^'``
4100
operator.
4101
4102
Example::
4103
4104
# construct Or using '^' operator
4105
4106
number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4107
print(number.searchString("123 3.1416 789"))
4108
4109
prints::
4110
4111
[['123'], ['3.1416'], ['789']]
4112
"""
4113
def __init__(self, exprs, savelist=False):
4114
super(Or, self).__init__(exprs, savelist)
4115
if self.exprs:
4116
self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4117
else:
4118
self.mayReturnEmpty = True
4119
4120
def streamline(self):
4121
super(Or, self).streamline()
4122
if __compat__.collect_all_And_tokens:
4123
self.saveAsList = any(e.saveAsList for e in self.exprs)
4124
return self
4125
4126
def parseImpl(self, instring, loc, doActions=True):
4127
maxExcLoc = -1
4128
maxException = None
4129
matches = []
4130
for e in self.exprs:
4131
try:
4132
loc2 = e.tryParse(instring, loc)
4133
except ParseException as err:
4134
err.__traceback__ = None
4135
if err.loc > maxExcLoc:
4136
maxException = err
4137
maxExcLoc = err.loc
4138
except IndexError:
4139
if len(instring) > maxExcLoc:
4140
maxException = ParseException(instring, len(instring), e.errmsg, self)
4141
maxExcLoc = len(instring)
4142
else:
4143
# save match among all matches, to retry longest to shortest
4144
matches.append((loc2, e))
4145
4146
if matches:
4147
# re-evaluate all matches in descending order of length of match, in case attached actions
4148
# might change whether or how much they match of the input.
4149
matches.sort(key=itemgetter(0), reverse=True)
4150
4151
if not doActions:
4152
# no further conditions or parse actions to change the selection of
4153
# alternative, so the first match will be the best match
4154
best_expr = matches[0][1]
4155
return best_expr._parse(instring, loc, doActions)
4156
4157
longest = -1, None
4158
for loc1, expr1 in matches:
4159
if loc1 <= longest[0]:
4160
# already have a longer match than this one will deliver, we are done
4161
return longest
4162
4163
try:
4164
loc2, toks = expr1._parse(instring, loc, doActions)
4165
except ParseException as err:
4166
err.__traceback__ = None
4167
if err.loc > maxExcLoc:
4168
maxException = err
4169
maxExcLoc = err.loc
4170
else:
4171
if loc2 >= loc1:
4172
return loc2, toks
4173
# didn't match as much as before
4174
elif loc2 > longest[0]:
4175
longest = loc2, toks
4176
4177
if longest != (-1, None):
4178
return longest
4179
4180
if maxException is not None:
4181
maxException.msg = self.errmsg
4182
raise maxException
4183
else:
4184
raise ParseException(instring, loc, "no defined alternatives to match", self)
4185
4186
4187
def __ixor__(self, other):
4188
if isinstance(other, basestring):
4189
other = self._literalStringClass(other)
4190
return self.append(other) # Or([self, other])
4191
4192
def __str__(self):
4193
if hasattr(self, "name"):
4194
return self.name
4195
4196
if self.strRepr is None:
4197
self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
4198
4199
return self.strRepr
4200
4201
def checkRecursion(self, parseElementList):
4202
subRecCheckList = parseElementList[:] + [self]
4203
for e in self.exprs:
4204
e.checkRecursion(subRecCheckList)
4205
4206
def _setResultsName(self, name, listAllMatches=False):
4207
if (not __compat__.collect_all_And_tokens
4208
and __diag__.warn_multiple_tokens_in_named_alternation):
4209
if any(isinstance(e, And) for e in self.exprs):
4210
warnings.warn("{0}: setting results name {1!r} on {2} expression "
4211
"may only return a single token for an And alternative, "
4212
"in future will return the full list of tokens".format(
4213
"warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
4214
stacklevel=3)
4215
4216
return super(Or, self)._setResultsName(name, listAllMatches)
4217
4218
4219
class MatchFirst(ParseExpression):
4220
"""Requires that at least one :class:`ParseExpression` is found. If
4221
two expressions match, the first one listed is the one that will
4222
match. May be constructed using the ``'|'`` operator.
4223
4224
Example::
4225
4226
# construct MatchFirst using '|' operator
4227
4228
# watch the order of expressions to match
4229
number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4230
print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
4231
4232
# put more selective expression first
4233
number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4234
print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
4235
"""
4236
def __init__(self, exprs, savelist=False):
4237
super(MatchFirst, self).__init__(exprs, savelist)
4238
if self.exprs:
4239
self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4240
else:
4241
self.mayReturnEmpty = True
4242
4243
def streamline(self):
4244
super(MatchFirst, self).streamline()
4245
if __compat__.collect_all_And_tokens:
4246
self.saveAsList = any(e.saveAsList for e in self.exprs)
4247
return self
4248
4249
def parseImpl(self, instring, loc, doActions=True):
4250
maxExcLoc = -1
4251
maxException = None
4252
for e in self.exprs:
4253
try:
4254
ret = e._parse(instring, loc, doActions)
4255
return ret
4256
except ParseException as err:
4257
if err.loc > maxExcLoc:
4258
maxException = err
4259
maxExcLoc = err.loc
4260
except IndexError:
4261
if len(instring) > maxExcLoc:
4262
maxException = ParseException(instring, len(instring), e.errmsg, self)
4263
maxExcLoc = len(instring)
4264
4265
# only got here if no expression matched, raise exception for match that made it the furthest
4266
else:
4267
if maxException is not None:
4268
maxException.msg = self.errmsg
4269
raise maxException
4270
else:
4271
raise ParseException(instring, loc, "no defined alternatives to match", self)
4272
4273
def __ior__(self, other):
4274
if isinstance(other, basestring):
4275
other = self._literalStringClass(other)
4276
return self.append(other) # MatchFirst([self, other])
4277
4278
def __str__(self):
4279
if hasattr(self, "name"):
4280
return self.name
4281
4282
if self.strRepr is None:
4283
self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
4284
4285
return self.strRepr
4286
4287
def checkRecursion(self, parseElementList):
4288
subRecCheckList = parseElementList[:] + [self]
4289
for e in self.exprs:
4290
e.checkRecursion(subRecCheckList)
4291
4292
def _setResultsName(self, name, listAllMatches=False):
4293
if (not __compat__.collect_all_And_tokens
4294
and __diag__.warn_multiple_tokens_in_named_alternation):
4295
if any(isinstance(e, And) for e in self.exprs):
4296
warnings.warn("{0}: setting results name {1!r} on {2} expression "
4297
"may only return a single token for an And alternative, "
4298
"in future will return the full list of tokens".format(
4299
"warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
4300
stacklevel=3)
4301
4302
return super(MatchFirst, self)._setResultsName(name, listAllMatches)
4303
4304
4305
class Each(ParseExpression):
4306
"""Requires all given :class:`ParseExpression` s to be found, but in
4307
any order. Expressions may be separated by whitespace.
4308
4309
May be constructed using the ``'&'`` operator.
4310
4311
Example::
4312
4313
color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4314
shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4315
integer = Word(nums)
4316
shape_attr = "shape:" + shape_type("shape")
4317
posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4318
color_attr = "color:" + color("color")
4319
size_attr = "size:" + integer("size")
4320
4321
# use Each (using operator '&') to accept attributes in any order
4322
# (shape and posn are required, color and size are optional)
4323
shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
4324
4325
shape_spec.runTests('''
4326
shape: SQUARE color: BLACK posn: 100, 120
4327
shape: CIRCLE size: 50 color: BLUE posn: 50,80
4328
color:GREEN size:20 shape:TRIANGLE posn:20,40
4329
'''
4330
)
4331
4332
prints::
4333
4334
shape: SQUARE color: BLACK posn: 100, 120
4335
['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4336
- color: BLACK
4337
- posn: ['100', ',', '120']
4338
- x: 100
4339
- y: 120
4340
- shape: SQUARE
4341
4342
4343
shape: CIRCLE size: 50 color: BLUE posn: 50,80
4344
['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4345
- color: BLUE
4346
- posn: ['50', ',', '80']
4347
- x: 50
4348
- y: 80
4349
- shape: CIRCLE
4350
- size: 50
4351
4352
4353
color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4354
['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4355
- color: GREEN
4356
- posn: ['20', ',', '40']
4357
- x: 20
4358
- y: 40
4359
- shape: TRIANGLE
4360
- size: 20
4361
"""
4362
def __init__(self, exprs, savelist=True):
4363
super(Each, self).__init__(exprs, savelist)
4364
self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4365
self.skipWhitespace = True
4366
self.initExprGroups = True
4367
self.saveAsList = True
4368
4369
def streamline(self):
4370
super(Each, self).streamline()
4371
self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4372
return self
4373
4374
def parseImpl(self, instring, loc, doActions=True):
4375
if self.initExprGroups:
4376
self.opt1map = dict((id(e.expr), e) for e in self.exprs if isinstance(e, Optional))
4377
opt1 = [e.expr for e in self.exprs if isinstance(e, Optional)]
4378
opt2 = [e for e in self.exprs if e.mayReturnEmpty and not isinstance(e, (Optional, Regex))]
4379
self.optionals = opt1 + opt2
4380
self.multioptionals = [e.expr for e in self.exprs if isinstance(e, ZeroOrMore)]
4381
self.multirequired = [e.expr for e in self.exprs if isinstance(e, OneOrMore)]
4382
self.required = [e for e in self.exprs if not isinstance(e, (Optional, ZeroOrMore, OneOrMore))]
4383
self.required += self.multirequired
4384
self.initExprGroups = False
4385
tmpLoc = loc
4386
tmpReqd = self.required[:]
4387
tmpOpt = self.optionals[:]
4388
matchOrder = []
4389
4390
keepMatching = True
4391
while keepMatching:
4392
tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
4393
failed = []
4394
for e in tmpExprs:
4395
try:
4396
tmpLoc = e.tryParse(instring, tmpLoc)
4397
except ParseException:
4398
failed.append(e)
4399
else:
4400
matchOrder.append(self.opt1map.get(id(e), e))
4401
if e in tmpReqd:
4402
tmpReqd.remove(e)
4403
elif e in tmpOpt:
4404
tmpOpt.remove(e)
4405
if len(failed) == len(tmpExprs):
4406
keepMatching = False
4407
4408
if tmpReqd:
4409
missing = ", ".join(_ustr(e) for e in tmpReqd)
4410
raise ParseException(instring, loc, "Missing one or more required elements (%s)" % missing)
4411
4412
# add any unmatched Optionals, in case they have default values defined
4413
matchOrder += [e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt]
4414
4415
resultlist = []
4416
for e in matchOrder:
4417
loc, results = e._parse(instring, loc, doActions)
4418
resultlist.append(results)
4419
4420
finalResults = sum(resultlist, ParseResults([]))
4421
return loc, finalResults
4422
4423
def __str__(self):
4424
if hasattr(self, "name"):
4425
return self.name
4426
4427
if self.strRepr is None:
4428
self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
4429
4430
return self.strRepr
4431
4432
def checkRecursion(self, parseElementList):
4433
subRecCheckList = parseElementList[:] + [self]
4434
for e in self.exprs:
4435
e.checkRecursion(subRecCheckList)
4436
4437
4438
class ParseElementEnhance(ParserElement):
4439
"""Abstract subclass of :class:`ParserElement`, for combining and
4440
post-processing parsed tokens.
4441
"""
4442
def __init__(self, expr, savelist=False):
4443
super(ParseElementEnhance, self).__init__(savelist)
4444
if isinstance(expr, basestring):
4445
if issubclass(self._literalStringClass, Token):
4446
expr = self._literalStringClass(expr)
4447
else:
4448
expr = self._literalStringClass(Literal(expr))
4449
self.expr = expr
4450
self.strRepr = None
4451
if expr is not None:
4452
self.mayIndexError = expr.mayIndexError
4453
self.mayReturnEmpty = expr.mayReturnEmpty
4454
self.setWhitespaceChars(expr.whiteChars)
4455
self.skipWhitespace = expr.skipWhitespace
4456
self.saveAsList = expr.saveAsList
4457
self.callPreparse = expr.callPreparse
4458
self.ignoreExprs.extend(expr.ignoreExprs)
4459
4460
def parseImpl(self, instring, loc, doActions=True):
4461
if self.expr is not None:
4462
return self.expr._parse(instring, loc, doActions, callPreParse=False)
4463
else:
4464
raise ParseException("", loc, self.errmsg, self)
4465
4466
def leaveWhitespace(self):
4467
self.skipWhitespace = False
4468
self.expr = self.expr.copy()
4469
if self.expr is not None:
4470
self.expr.leaveWhitespace()
4471
return self
4472
4473
def ignore(self, other):
4474
if isinstance(other, Suppress):
4475
if other not in self.ignoreExprs:
4476
super(ParseElementEnhance, self).ignore(other)
4477
if self.expr is not None:
4478
self.expr.ignore(self.ignoreExprs[-1])
4479
else:
4480
super(ParseElementEnhance, self).ignore(other)
4481
if self.expr is not None:
4482
self.expr.ignore(self.ignoreExprs[-1])
4483
return self
4484
4485
def streamline(self):
4486
super(ParseElementEnhance, self).streamline()
4487
if self.expr is not None:
4488
self.expr.streamline()
4489
return self
4490
4491
def checkRecursion(self, parseElementList):
4492
if self in parseElementList:
4493
raise RecursiveGrammarException(parseElementList + [self])
4494
subRecCheckList = parseElementList[:] + [self]
4495
if self.expr is not None:
4496
self.expr.checkRecursion(subRecCheckList)
4497
4498
def validate(self, validateTrace=None):
4499
if validateTrace is None:
4500
validateTrace = []
4501
tmp = validateTrace[:] + [self]
4502
if self.expr is not None:
4503
self.expr.validate(tmp)
4504
self.checkRecursion([])
4505
4506
def __str__(self):
4507
try:
4508
return super(ParseElementEnhance, self).__str__()
4509
except Exception:
4510
pass
4511
4512
if self.strRepr is None and self.expr is not None:
4513
self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.expr))
4514
return self.strRepr
4515
4516
4517
class FollowedBy(ParseElementEnhance):
4518
"""Lookahead matching of the given parse expression.
4519
``FollowedBy`` does *not* advance the parsing position within
4520
the input string, it only verifies that the specified parse
4521
expression matches at the current position. ``FollowedBy``
4522
always returns a null token list. If any results names are defined
4523
in the lookahead expression, those *will* be returned for access by
4524
name.
4525
4526
Example::
4527
4528
# use FollowedBy to match a label only if it is followed by a ':'
4529
data_word = Word(alphas)
4530
label = data_word + FollowedBy(':')
4531
attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4532
4533
OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
4534
4535
prints::
4536
4537
[['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4538
"""
4539
def __init__(self, expr):
4540
super(FollowedBy, self).__init__(expr)
4541
self.mayReturnEmpty = True
4542
4543
def parseImpl(self, instring, loc, doActions=True):
4544
# by using self._expr.parse and deleting the contents of the returned ParseResults list
4545
# we keep any named results that were defined in the FollowedBy expression
4546
_, ret = self.expr._parse(instring, loc, doActions=doActions)
4547
del ret[:]
4548
4549
return loc, ret
4550
4551
4552
class PrecededBy(ParseElementEnhance):
4553
"""Lookbehind matching of the given parse expression.
4554
``PrecededBy`` does not advance the parsing position within the
4555
input string, it only verifies that the specified parse expression
4556
matches prior to the current position. ``PrecededBy`` always
4557
returns a null token list, but if a results name is defined on the
4558
given expression, it is returned.
4559
4560
Parameters:
4561
4562
- expr - expression that must match prior to the current parse
4563
location
4564
- retreat - (default= ``None``) - (int) maximum number of characters
4565
to lookbehind prior to the current parse location
4566
4567
If the lookbehind expression is a string, Literal, Keyword, or
4568
a Word or CharsNotIn with a specified exact or maximum length, then
4569
the retreat parameter is not required. Otherwise, retreat must be
4570
specified to give a maximum number of characters to look back from
4571
the current parse position for a lookbehind match.
4572
4573
Example::
4574
4575
# VB-style variable names with type prefixes
4576
int_var = PrecededBy("#") + pyparsing_common.identifier
4577
str_var = PrecededBy("$") + pyparsing_common.identifier
4578
4579
"""
4580
def __init__(self, expr, retreat=None):
4581
super(PrecededBy, self).__init__(expr)
4582
self.expr = self.expr().leaveWhitespace()
4583
self.mayReturnEmpty = True
4584
self.mayIndexError = False
4585
self.exact = False
4586
if isinstance(expr, str):
4587
retreat = len(expr)
4588
self.exact = True
4589
elif isinstance(expr, (Literal, Keyword)):
4590
retreat = expr.matchLen
4591
self.exact = True
4592
elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
4593
retreat = expr.maxLen
4594
self.exact = True
4595
elif isinstance(expr, _PositionToken):
4596
retreat = 0
4597
self.exact = True
4598
self.retreat = retreat
4599
self.errmsg = "not preceded by " + str(expr)
4600
self.skipWhitespace = False
4601
self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
4602
4603
def parseImpl(self, instring, loc=0, doActions=True):
4604
if self.exact:
4605
if loc < self.retreat:
4606
raise ParseException(instring, loc, self.errmsg)
4607
start = loc - self.retreat
4608
_, ret = self.expr._parse(instring, start)
4609
else:
4610
# retreat specified a maximum lookbehind window, iterate
4611
test_expr = self.expr + StringEnd()
4612
instring_slice = instring[max(0, loc - self.retreat):loc]
4613
last_expr = ParseException(instring, loc, self.errmsg)
4614
for offset in range(1, min(loc, self.retreat + 1)+1):
4615
try:
4616
# print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
4617
_, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
4618
except ParseBaseException as pbe:
4619
last_expr = pbe
4620
else:
4621
break
4622
else:
4623
raise last_expr
4624
return loc, ret
4625
4626
4627
class NotAny(ParseElementEnhance):
4628
"""Lookahead to disallow matching with the given parse expression.
4629
``NotAny`` does *not* advance the parsing position within the
4630
input string, it only verifies that the specified parse expression
4631
does *not* match at the current position. Also, ``NotAny`` does
4632
*not* skip over leading whitespace. ``NotAny`` always returns
4633
a null token list. May be constructed using the '~' operator.
4634
4635
Example::
4636
4637
AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
4638
4639
# take care not to mistake keywords for identifiers
4640
ident = ~(AND | OR | NOT) + Word(alphas)
4641
boolean_term = Optional(NOT) + ident
4642
4643
# very crude boolean expression - to support parenthesis groups and
4644
# operation hierarchy, use infixNotation
4645
boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term)
4646
4647
# integers that are followed by "." are actually floats
4648
integer = Word(nums) + ~Char(".")
4649
"""
4650
def __init__(self, expr):
4651
super(NotAny, self).__init__(expr)
4652
# ~ self.leaveWhitespace()
4653
self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
4654
self.mayReturnEmpty = True
4655
self.errmsg = "Found unwanted token, " + _ustr(self.expr)
4656
4657
def parseImpl(self, instring, loc, doActions=True):
4658
if self.expr.canParseNext(instring, loc):
4659
raise ParseException(instring, loc, self.errmsg, self)
4660
return loc, []
4661
4662
def __str__(self):
4663
if hasattr(self, "name"):
4664
return self.name
4665
4666
if self.strRepr is None:
4667
self.strRepr = "~{" + _ustr(self.expr) + "}"
4668
4669
return self.strRepr
4670
4671
class _MultipleMatch(ParseElementEnhance):
4672
def __init__(self, expr, stopOn=None):
4673
super(_MultipleMatch, self).__init__(expr)
4674
self.saveAsList = True
4675
ender = stopOn
4676
if isinstance(ender, basestring):
4677
ender = self._literalStringClass(ender)
4678
self.stopOn(ender)
4679
4680
def stopOn(self, ender):
4681
if isinstance(ender, basestring):
4682
ender = self._literalStringClass(ender)
4683
self.not_ender = ~ender if ender is not None else None
4684
return self
4685
4686
def parseImpl(self, instring, loc, doActions=True):
4687
self_expr_parse = self.expr._parse
4688
self_skip_ignorables = self._skipIgnorables
4689
check_ender = self.not_ender is not None
4690
if check_ender:
4691
try_not_ender = self.not_ender.tryParse
4692
4693
# must be at least one (but first see if we are the stopOn sentinel;
4694
# if so, fail)
4695
if check_ender:
4696
try_not_ender(instring, loc)
4697
loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False)
4698
try:
4699
hasIgnoreExprs = (not not self.ignoreExprs)
4700
while 1:
4701
if check_ender:
4702
try_not_ender(instring, loc)
4703
if hasIgnoreExprs:
4704
preloc = self_skip_ignorables(instring, loc)
4705
else:
4706
preloc = loc
4707
loc, tmptokens = self_expr_parse(instring, preloc, doActions)
4708
if tmptokens or tmptokens.haskeys():
4709
tokens += tmptokens
4710
except (ParseException, IndexError):
4711
pass
4712
4713
return loc, tokens
4714
4715
def _setResultsName(self, name, listAllMatches=False):
4716
if __diag__.warn_ungrouped_named_tokens_in_collection:
4717
for e in [self.expr] + getattr(self.expr, 'exprs', []):
4718
if isinstance(e, ParserElement) and e.resultsName:
4719
warnings.warn("{0}: setting results name {1!r} on {2} expression "
4720
"collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
4721
name,
4722
type(self).__name__,
4723
e.resultsName),
4724
stacklevel=3)
4725
4726
return super(_MultipleMatch, self)._setResultsName(name, listAllMatches)
4727
4728
4729
class OneOrMore(_MultipleMatch):
4730
"""Repetition of one or more of the given expression.
4731
4732
Parameters:
4733
- expr - expression that must match one or more times
4734
- stopOn - (default= ``None``) - expression for a terminating sentinel
4735
(only required if the sentinel would ordinarily match the repetition
4736
expression)
4737
4738
Example::
4739
4740
data_word = Word(alphas)
4741
label = data_word + FollowedBy(':')
4742
attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4743
4744
text = "shape: SQUARE posn: upper left color: BLACK"
4745
OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
4746
4747
# use stopOn attribute for OneOrMore to avoid reading label string as part of the data
4748
attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4749
OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
4750
4751
# could also be written as
4752
(attr_expr * (1,)).parseString(text).pprint()
4753
"""
4754
4755
def __str__(self):
4756
if hasattr(self, "name"):
4757
return self.name
4758
4759
if self.strRepr is None:
4760
self.strRepr = "{" + _ustr(self.expr) + "}..."
4761
4762
return self.strRepr
4763
4764
class ZeroOrMore(_MultipleMatch):
4765
"""Optional repetition of zero or more of the given expression.
4766
4767
Parameters:
4768
- expr - expression that must match zero or more times
4769
- stopOn - (default= ``None``) - expression for a terminating sentinel
4770
(only required if the sentinel would ordinarily match the repetition
4771
expression)
4772
4773
Example: similar to :class:`OneOrMore`
4774
"""
4775
def __init__(self, expr, stopOn=None):
4776
super(ZeroOrMore, self).__init__(expr, stopOn=stopOn)
4777
self.mayReturnEmpty = True
4778
4779
def parseImpl(self, instring, loc, doActions=True):
4780
try:
4781
return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
4782
except (ParseException, IndexError):
4783
return loc, []
4784
4785
def __str__(self):
4786
if hasattr(self, "name"):
4787
return self.name
4788
4789
if self.strRepr is None:
4790
self.strRepr = "[" + _ustr(self.expr) + "]..."
4791
4792
return self.strRepr
4793
4794
4795
class _NullToken(object):
4796
def __bool__(self):
4797
return False
4798
__nonzero__ = __bool__
4799
def __str__(self):
4800
return ""
4801
4802
class Optional(ParseElementEnhance):
4803
"""Optional matching of the given expression.
4804
4805
Parameters:
4806
- expr - expression that must match zero or more times
4807
- default (optional) - value to be returned if the optional expression is not found.
4808
4809
Example::
4810
4811
# US postal code can be a 5-digit zip, plus optional 4-digit qualifier
4812
zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
4813
zip.runTests('''
4814
# traditional ZIP code
4815
12345
4816
4817
# ZIP+4 form
4818
12101-0001
4819
4820
# invalid ZIP
4821
98765-
4822
''')
4823
4824
prints::
4825
4826
# traditional ZIP code
4827
12345
4828
['12345']
4829
4830
# ZIP+4 form
4831
12101-0001
4832
['12101-0001']
4833
4834
# invalid ZIP
4835
98765-
4836
^
4837
FAIL: Expected end of text (at char 5), (line:1, col:6)
4838
"""
4839
__optionalNotMatched = _NullToken()
4840
4841
def __init__(self, expr, default=__optionalNotMatched):
4842
super(Optional, self).__init__(expr, savelist=False)
4843
self.saveAsList = self.expr.saveAsList
4844
self.defaultValue = default
4845
self.mayReturnEmpty = True
4846
4847
def parseImpl(self, instring, loc, doActions=True):
4848
try:
4849
loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False)
4850
except (ParseException, IndexError):
4851
if self.defaultValue is not self.__optionalNotMatched:
4852
if self.expr.resultsName:
4853
tokens = ParseResults([self.defaultValue])
4854
tokens[self.expr.resultsName] = self.defaultValue
4855
else:
4856
tokens = [self.defaultValue]
4857
else:
4858
tokens = []
4859
return loc, tokens
4860
4861
def __str__(self):
4862
if hasattr(self, "name"):
4863
return self.name
4864
4865
if self.strRepr is None:
4866
self.strRepr = "[" + _ustr(self.expr) + "]"
4867
4868
return self.strRepr
4869
4870
class SkipTo(ParseElementEnhance):
4871
"""Token for skipping over all undefined text until the matched
4872
expression is found.
4873
4874
Parameters:
4875
- expr - target expression marking the end of the data to be skipped
4876
- include - (default= ``False``) if True, the target expression is also parsed
4877
(the skipped text and target expression are returned as a 2-element list).
4878
- ignore - (default= ``None``) used to define grammars (typically quoted strings and
4879
comments) that might contain false matches to the target expression
4880
- failOn - (default= ``None``) define expressions that are not allowed to be
4881
included in the skipped test; if found before the target expression is found,
4882
the SkipTo is not a match
4883
4884
Example::
4885
4886
report = '''
4887
Outstanding Issues Report - 1 Jan 2000
4888
4889
# | Severity | Description | Days Open
4890
-----+----------+-------------------------------------------+-----------
4891
101 | Critical | Intermittent system crash | 6
4892
94 | Cosmetic | Spelling error on Login ('log|n') | 14
4893
79 | Minor | System slow when running too many reports | 47
4894
'''
4895
integer = Word(nums)
4896
SEP = Suppress('|')
4897
# use SkipTo to simply match everything up until the next SEP
4898
# - ignore quoted strings, so that a '|' character inside a quoted string does not match
4899
# - parse action will call token.strip() for each matched token, i.e., the description body
4900
string_data = SkipTo(SEP, ignore=quotedString)
4901
string_data.setParseAction(tokenMap(str.strip))
4902
ticket_expr = (integer("issue_num") + SEP
4903
+ string_data("sev") + SEP
4904
+ string_data("desc") + SEP
4905
+ integer("days_open"))
4906
4907
for tkt in ticket_expr.searchString(report):
4908
print tkt.dump()
4909
4910
prints::
4911
4912
['101', 'Critical', 'Intermittent system crash', '6']
4913
- days_open: 6
4914
- desc: Intermittent system crash
4915
- issue_num: 101
4916
- sev: Critical
4917
['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4918
- days_open: 14
4919
- desc: Spelling error on Login ('log|n')
4920
- issue_num: 94
4921
- sev: Cosmetic
4922
['79', 'Minor', 'System slow when running too many reports', '47']
4923
- days_open: 47
4924
- desc: System slow when running too many reports
4925
- issue_num: 79
4926
- sev: Minor
4927
"""
4928
def __init__(self, other, include=False, ignore=None, failOn=None):
4929
super(SkipTo, self).__init__(other)
4930
self.ignoreExpr = ignore
4931
self.mayReturnEmpty = True
4932
self.mayIndexError = False
4933
self.includeMatch = include
4934
self.saveAsList = False
4935
if isinstance(failOn, basestring):
4936
self.failOn = self._literalStringClass(failOn)
4937
else:
4938
self.failOn = failOn
4939
self.errmsg = "No match found for " + _ustr(self.expr)
4940
4941
def parseImpl(self, instring, loc, doActions=True):
4942
startloc = loc
4943
instrlen = len(instring)
4944
expr = self.expr
4945
expr_parse = self.expr._parse
4946
self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4947
self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4948
4949
tmploc = loc
4950
while tmploc <= instrlen:
4951
if self_failOn_canParseNext is not None:
4952
# break if failOn expression matches
4953
if self_failOn_canParseNext(instring, tmploc):
4954
break
4955
4956
if self_ignoreExpr_tryParse is not None:
4957
# advance past ignore expressions
4958
while 1:
4959
try:
4960
tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4961
except ParseBaseException:
4962
break
4963
4964
try:
4965
expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4966
except (ParseException, IndexError):
4967
# no match, advance loc in string
4968
tmploc += 1
4969
else:
4970
# matched skipto expr, done
4971
break
4972
4973
else:
4974
# ran off the end of the input string without matching skipto expr, fail
4975
raise ParseException(instring, loc, self.errmsg, self)
4976
4977
# build up return values
4978
loc = tmploc
4979
skiptext = instring[startloc:loc]
4980
skipresult = ParseResults(skiptext)
4981
4982
if self.includeMatch:
4983
loc, mat = expr_parse(instring, loc, doActions, callPreParse=False)
4984
skipresult += mat
4985
4986
return loc, skipresult
4987
4988
class Forward(ParseElementEnhance):
4989
"""Forward declaration of an expression to be defined later -
4990
used for recursive grammars, such as algebraic infix notation.
4991
When the expression is known, it is assigned to the ``Forward``
4992
variable using the '<<' operator.
4993
4994
Note: take care when assigning to ``Forward`` not to overlook
4995
precedence of operators.
4996
4997
Specifically, '|' has a lower precedence than '<<', so that::
4998
4999
fwdExpr << a | b | c
5000
5001
will actually be evaluated as::
5002
5003
(fwdExpr << a) | b | c
5004
5005
thereby leaving b and c out as parseable alternatives. It is recommended that you
5006
explicitly group the values inserted into the ``Forward``::
5007
5008
fwdExpr << (a | b | c)
5009
5010
Converting to use the '<<=' operator instead will avoid this problem.
5011
5012
See :class:`ParseResults.pprint` for an example of a recursive
5013
parser created using ``Forward``.
5014
"""
5015
def __init__(self, other=None):
5016
super(Forward, self).__init__(other, savelist=False)
5017
5018
def __lshift__(self, other):
5019
if isinstance(other, basestring):
5020
other = self._literalStringClass(other)
5021
self.expr = other
5022
self.strRepr = None
5023
self.mayIndexError = self.expr.mayIndexError
5024
self.mayReturnEmpty = self.expr.mayReturnEmpty
5025
self.setWhitespaceChars(self.expr.whiteChars)
5026
self.skipWhitespace = self.expr.skipWhitespace
5027
self.saveAsList = self.expr.saveAsList
5028
self.ignoreExprs.extend(self.expr.ignoreExprs)
5029
return self
5030
5031
def __ilshift__(self, other):
5032
return self << other
5033
5034
def leaveWhitespace(self):
5035
self.skipWhitespace = False
5036
return self
5037
5038
def streamline(self):
5039
if not self.streamlined:
5040
self.streamlined = True
5041
if self.expr is not None:
5042
self.expr.streamline()
5043
return self
5044
5045
def validate(self, validateTrace=None):
5046
if validateTrace is None:
5047
validateTrace = []
5048
5049
if self not in validateTrace:
5050
tmp = validateTrace[:] + [self]
5051
if self.expr is not None:
5052
self.expr.validate(tmp)
5053
self.checkRecursion([])
5054
5055
def __str__(self):
5056
if hasattr(self, "name"):
5057
return self.name
5058
if self.strRepr is not None:
5059
return self.strRepr
5060
5061
# Avoid infinite recursion by setting a temporary strRepr
5062
self.strRepr = ": ..."
5063
5064
# Use the string representation of main expression.
5065
retString = '...'
5066
try:
5067
if self.expr is not None:
5068
retString = _ustr(self.expr)[:1000]
5069
else:
5070
retString = "None"
5071
finally:
5072
self.strRepr = self.__class__.__name__ + ": " + retString
5073
return self.strRepr
5074
5075
def copy(self):
5076
if self.expr is not None:
5077
return super(Forward, self).copy()
5078
else:
5079
ret = Forward()
5080
ret <<= self
5081
return ret
5082
5083
def _setResultsName(self, name, listAllMatches=False):
5084
if __diag__.warn_name_set_on_empty_Forward:
5085
if self.expr is None:
5086
warnings.warn("{0}: setting results name {0!r} on {1} expression "
5087
"that has no contained expression".format("warn_name_set_on_empty_Forward",
5088
name,
5089
type(self).__name__),
5090
stacklevel=3)
5091
5092
return super(Forward, self)._setResultsName(name, listAllMatches)
5093
5094
class TokenConverter(ParseElementEnhance):
5095
"""
5096
Abstract subclass of :class:`ParseExpression`, for converting parsed results.
5097
"""
5098
def __init__(self, expr, savelist=False):
5099
super(TokenConverter, self).__init__(expr) # , savelist)
5100
self.saveAsList = False
5101
5102
class Combine(TokenConverter):
5103
"""Converter to concatenate all matching tokens to a single string.
5104
By default, the matching patterns must also be contiguous in the
5105
input string; this can be disabled by specifying
5106
``'adjacent=False'`` in the constructor.
5107
5108
Example::
5109
5110
real = Word(nums) + '.' + Word(nums)
5111
print(real.parseString('3.1416')) # -> ['3', '.', '1416']
5112
# will also erroneously match the following
5113
print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
5114
5115
real = Combine(Word(nums) + '.' + Word(nums))
5116
print(real.parseString('3.1416')) # -> ['3.1416']
5117
# no match when there are internal spaces
5118
print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
5119
"""
5120
def __init__(self, expr, joinString="", adjacent=True):
5121
super(Combine, self).__init__(expr)
5122
# suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
5123
if adjacent:
5124
self.leaveWhitespace()
5125
self.adjacent = adjacent
5126
self.skipWhitespace = True
5127
self.joinString = joinString
5128
self.callPreparse = True
5129
5130
def ignore(self, other):
5131
if self.adjacent:
5132
ParserElement.ignore(self, other)
5133
else:
5134
super(Combine, self).ignore(other)
5135
return self
5136
5137
def postParse(self, instring, loc, tokenlist):
5138
retToks = tokenlist.copy()
5139
del retToks[:]
5140
retToks += ParseResults(["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults)
5141
5142
if self.resultsName and retToks.haskeys():
5143
return [retToks]
5144
else:
5145
return retToks
5146
5147
class Group(TokenConverter):
5148
"""Converter to return the matched tokens as a list - useful for
5149
returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5150
5151
Example::
5152
5153
ident = Word(alphas)
5154
num = Word(nums)
5155
term = ident | num
5156
func = ident + Optional(delimitedList(term))
5157
print(func.parseString("fn a, b, 100")) # -> ['fn', 'a', 'b', '100']
5158
5159
func = ident + Group(Optional(delimitedList(term)))
5160
print(func.parseString("fn a, b, 100")) # -> ['fn', ['a', 'b', '100']]
5161
"""
5162
def __init__(self, expr):
5163
super(Group, self).__init__(expr)
5164
self.saveAsList = True
5165
5166
def postParse(self, instring, loc, tokenlist):
5167
return [tokenlist]
5168
5169
class Dict(TokenConverter):
5170
"""Converter to return a repetitive expression as a list, but also
5171
as a dictionary. Each element can also be referenced using the first
5172
token in the expression as its key. Useful for tabular report
5173
scraping when the first column can be used as a item key.
5174
5175
Example::
5176
5177
data_word = Word(alphas)
5178
label = data_word + FollowedBy(':')
5179
attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
5180
5181
text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5182
attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
5183
5184
# print attributes as plain groups
5185
print(OneOrMore(attr_expr).parseString(text).dump())
5186
5187
# instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
5188
result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
5189
print(result.dump())
5190
5191
# access named fields as dict entries, or output as dict
5192
print(result['shape'])
5193
print(result.asDict())
5194
5195
prints::
5196
5197
['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5198
[['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5199
- color: light blue
5200
- posn: upper left
5201
- shape: SQUARE
5202
- texture: burlap
5203
SQUARE
5204
{'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
5205
5206
See more examples at :class:`ParseResults` of accessing fields by results name.
5207
"""
5208
def __init__(self, expr):
5209
super(Dict, self).__init__(expr)
5210
self.saveAsList = True
5211
5212
def postParse(self, instring, loc, tokenlist):
5213
for i, tok in enumerate(tokenlist):
5214
if len(tok) == 0:
5215
continue
5216
ikey = tok[0]
5217
if isinstance(ikey, int):
5218
ikey = _ustr(tok[0]).strip()
5219
if len(tok) == 1:
5220
tokenlist[ikey] = _ParseResultsWithOffset("", i)
5221
elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
5222
tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
5223
else:
5224
dictvalue = tok.copy() # ParseResults(i)
5225
del dictvalue[0]
5226
if len(dictvalue) != 1 or (isinstance(dictvalue, ParseResults) and dictvalue.haskeys()):
5227
tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
5228
else:
5229
tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
5230
5231
if self.resultsName:
5232
return [tokenlist]
5233
else:
5234
return tokenlist
5235
5236
5237
class Suppress(TokenConverter):
5238
"""Converter for ignoring the results of a parsed expression.
5239
5240
Example::
5241
5242
source = "a, b, c,d"
5243
wd = Word(alphas)
5244
wd_list1 = wd + ZeroOrMore(',' + wd)
5245
print(wd_list1.parseString(source))
5246
5247
# often, delimiters that are useful during parsing are just in the
5248
# way afterward - use Suppress to keep them out of the parsed output
5249
wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
5250
print(wd_list2.parseString(source))
5251
5252
prints::
5253
5254
['a', ',', 'b', ',', 'c', ',', 'd']
5255
['a', 'b', 'c', 'd']
5256
5257
(See also :class:`delimitedList`.)
5258
"""
5259
def postParse(self, instring, loc, tokenlist):
5260
return []
5261
5262
def suppress(self):
5263
return self
5264
5265
5266
class OnlyOnce(object):
5267
"""Wrapper for parse actions, to ensure they are only called once.
5268
"""
5269
def __init__(self, methodCall):
5270
self.callable = _trim_arity(methodCall)
5271
self.called = False
5272
def __call__(self, s, l, t):
5273
if not self.called:
5274
results = self.callable(s, l, t)
5275
self.called = True
5276
return results
5277
raise ParseException(s, l, "")
5278
def reset(self):
5279
self.called = False
5280
5281
def traceParseAction(f):
5282
"""Decorator for debugging parse actions.
5283
5284
When the parse action is called, this decorator will print
5285
``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
5286
When the parse action completes, the decorator will print
5287
``"<<"`` followed by the returned value, or any exception that the parse action raised.
5288
5289
Example::
5290
5291
wd = Word(alphas)
5292
5293
@traceParseAction
5294
def remove_duplicate_chars(tokens):
5295
return ''.join(sorted(set(''.join(tokens))))
5296
5297
wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
5298
print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
5299
5300
prints::
5301
5302
>>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
5303
<<leaving remove_duplicate_chars (ret: 'dfjkls')
5304
['dfjkls']
5305
"""
5306
f = _trim_arity(f)
5307
def z(*paArgs):
5308
thisFunc = f.__name__
5309
s, l, t = paArgs[-3:]
5310
if len(paArgs) > 3:
5311
thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
5312
sys.stderr.write(">>entering %s(line: '%s', %d, %r)\n" % (thisFunc, line(l, s), l, t))
5313
try:
5314
ret = f(*paArgs)
5315
except Exception as exc:
5316
sys.stderr.write("<<leaving %s (exception: %s)\n" % (thisFunc, exc))
5317
raise
5318
sys.stderr.write("<<leaving %s (ret: %r)\n" % (thisFunc, ret))
5319
return ret
5320
try:
5321
z.__name__ = f.__name__
5322
except AttributeError:
5323
pass
5324
return z
5325
5326
#
5327
# global helpers
5328
#
5329
def delimitedList(expr, delim=",", combine=False):
5330
"""Helper to define a delimited list of expressions - the delimiter
5331
defaults to ','. By default, the list elements and delimiters can
5332
have intervening whitespace, and comments, but this can be
5333
overridden by passing ``combine=True`` in the constructor. If
5334
``combine`` is set to ``True``, the matching tokens are
5335
returned as a single token string, with the delimiters included;
5336
otherwise, the matching tokens are returned as a list of tokens,
5337
with the delimiters suppressed.
5338
5339
Example::
5340
5341
delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
5342
delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
5343
"""
5344
dlName = _ustr(expr) + " [" + _ustr(delim) + " " + _ustr(expr) + "]..."
5345
if combine:
5346
return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName)
5347
else:
5348
return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName)
5349
5350
def countedArray(expr, intExpr=None):
5351
"""Helper to define a counted list of expressions.
5352
5353
This helper defines a pattern of the form::
5354
5355
integer expr expr expr...
5356
5357
where the leading integer tells how many expr expressions follow.
5358
The matched tokens returns the array of expr tokens as a list - the
5359
leading count token is suppressed.
5360
5361
If ``intExpr`` is specified, it should be a pyparsing expression
5362
that produces an integer value.
5363
5364
Example::
5365
5366
countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
5367
5368
# in this parser, the leading integer value is given in binary,
5369
# '10' indicating that 2 values are in the array
5370
binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
5371
countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
5372
"""
5373
arrayExpr = Forward()
5374
def countFieldParseAction(s, l, t):
5375
n = t[0]
5376
arrayExpr << (n and Group(And([expr] * n)) or Group(empty))
5377
return []
5378
if intExpr is None:
5379
intExpr = Word(nums).setParseAction(lambda t: int(t[0]))
5380
else:
5381
intExpr = intExpr.copy()
5382
intExpr.setName("arrayLen")
5383
intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
5384
return (intExpr + arrayExpr).setName('(len) ' + _ustr(expr) + '...')
5385
5386
def _flatten(L):
5387
ret = []
5388
for i in L:
5389
if isinstance(i, list):
5390
ret.extend(_flatten(i))
5391
else:
5392
ret.append(i)
5393
return ret
5394
5395
def matchPreviousLiteral(expr):
5396
"""Helper to define an expression that is indirectly defined from
5397
the tokens matched in a previous expression, that is, it looks for
5398
a 'repeat' of a previous expression. For example::
5399
5400
first = Word(nums)
5401
second = matchPreviousLiteral(first)
5402
matchExpr = first + ":" + second
5403
5404
will match ``"1:1"``, but not ``"1:2"``. Because this
5405
matches a previous literal, will also match the leading
5406
``"1:1"`` in ``"1:10"``. If this is not desired, use
5407
:class:`matchPreviousExpr`. Do *not* use with packrat parsing
5408
enabled.
5409
"""
5410
rep = Forward()
5411
def copyTokenToRepeater(s, l, t):
5412
if t:
5413
if len(t) == 1:
5414
rep << t[0]
5415
else:
5416
# flatten t tokens
5417
tflat = _flatten(t.asList())
5418
rep << And(Literal(tt) for tt in tflat)
5419
else:
5420
rep << Empty()
5421
expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
5422
rep.setName('(prev) ' + _ustr(expr))
5423
return rep
5424
5425
def matchPreviousExpr(expr):
5426
"""Helper to define an expression that is indirectly defined from
5427
the tokens matched in a previous expression, that is, it looks for
5428
a 'repeat' of a previous expression. For example::
5429
5430
first = Word(nums)
5431
second = matchPreviousExpr(first)
5432
matchExpr = first + ":" + second
5433
5434
will match ``"1:1"``, but not ``"1:2"``. Because this
5435
matches by expressions, will *not* match the leading ``"1:1"``
5436
in ``"1:10"``; the expressions are evaluated first, and then
5437
compared, so ``"1"`` is compared with ``"10"``. Do *not* use
5438
with packrat parsing enabled.
5439
"""
5440
rep = Forward()
5441
e2 = expr.copy()
5442
rep <<= e2
5443
def copyTokenToRepeater(s, l, t):
5444
matchTokens = _flatten(t.asList())
5445
def mustMatchTheseTokens(s, l, t):
5446
theseTokens = _flatten(t.asList())
5447
if theseTokens != matchTokens:
5448
raise ParseException('', 0, '')
5449
rep.setParseAction(mustMatchTheseTokens, callDuringTry=True)
5450
expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
5451
rep.setName('(prev) ' + _ustr(expr))
5452
return rep
5453
5454
def _escapeRegexRangeChars(s):
5455
# ~ escape these chars: ^-[]
5456
for c in r"\^-[]":
5457
s = s.replace(c, _bslash + c)
5458
s = s.replace("\n", r"\n")
5459
s = s.replace("\t", r"\t")
5460
return _ustr(s)
5461
5462
def oneOf(strs, caseless=False, useRegex=True, asKeyword=False):
5463
"""Helper to quickly define a set of alternative Literals, and makes
5464
sure to do longest-first testing when there is a conflict,
5465
regardless of the input order, but returns
5466
a :class:`MatchFirst` for best performance.
5467
5468
Parameters:
5469
5470
- strs - a string of space-delimited literals, or a collection of
5471
string literals
5472
- caseless - (default= ``False``) - treat all literals as
5473
caseless
5474
- useRegex - (default= ``True``) - as an optimization, will
5475
generate a Regex object; otherwise, will generate
5476
a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if
5477
creating a :class:`Regex` raises an exception)
5478
- asKeyword - (default=``False``) - enforce Keyword-style matching on the
5479
generated expressions
5480
5481
Example::
5482
5483
comp_oper = oneOf("< = > <= >= !=")
5484
var = Word(alphas)
5485
number = Word(nums)
5486
term = var | number
5487
comparison_expr = term + comp_oper + term
5488
print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
5489
5490
prints::
5491
5492
[['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
5493
"""
5494
if isinstance(caseless, basestring):
5495
warnings.warn("More than one string argument passed to oneOf, pass "
5496
"choices as a list or space-delimited string", stacklevel=2)
5497
5498
if caseless:
5499
isequal = (lambda a, b: a.upper() == b.upper())
5500
masks = (lambda a, b: b.upper().startswith(a.upper()))
5501
parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
5502
else:
5503
isequal = (lambda a, b: a == b)
5504
masks = (lambda a, b: b.startswith(a))
5505
parseElementClass = Keyword if asKeyword else Literal
5506
5507
symbols = []
5508
if isinstance(strs, basestring):
5509
symbols = strs.split()
5510
elif isinstance(strs, Iterable):
5511
symbols = list(strs)
5512
else:
5513
warnings.warn("Invalid argument to oneOf, expected string or iterable",
5514
SyntaxWarning, stacklevel=2)
5515
if not symbols:
5516
return NoMatch()
5517
5518
if not asKeyword:
5519
# if not producing keywords, need to reorder to take care to avoid masking
5520
# longer choices with shorter ones
5521
i = 0
5522
while i < len(symbols) - 1:
5523
cur = symbols[i]
5524
for j, other in enumerate(symbols[i + 1:]):
5525
if isequal(other, cur):
5526
del symbols[i + j + 1]
5527
break
5528
elif masks(cur, other):
5529
del symbols[i + j + 1]
5530
symbols.insert(i, other)
5531
break
5532
else:
5533
i += 1
5534
5535
if not (caseless or asKeyword) and useRegex:
5536
# ~ print (strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols]))
5537
try:
5538
if len(symbols) == len("".join(symbols)):
5539
return Regex("[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols))
5540
else:
5541
return Regex("|".join(re.escape(sym) for sym in symbols)).setName(' | '.join(symbols))
5542
except Exception:
5543
warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
5544
SyntaxWarning, stacklevel=2)
5545
5546
# last resort, just use MatchFirst
5547
return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
5548
5549
def dictOf(key, value):
5550
"""Helper to easily and clearly define a dictionary by specifying
5551
the respective patterns for the key and value. Takes care of
5552
defining the :class:`Dict`, :class:`ZeroOrMore`, and
5553
:class:`Group` tokens in the proper order. The key pattern
5554
can include delimiting markers or punctuation, as long as they are
5555
suppressed, thereby leaving the significant key text. The value
5556
pattern can include named results, so that the :class:`Dict` results
5557
can include named token fields.
5558
5559
Example::
5560
5561
text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5562
attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
5563
print(OneOrMore(attr_expr).parseString(text).dump())
5564
5565
attr_label = label
5566
attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
5567
5568
# similar to Dict, but simpler call format
5569
result = dictOf(attr_label, attr_value).parseString(text)
5570
print(result.dump())
5571
print(result['shape'])
5572
print(result.shape) # object attribute access works too
5573
print(result.asDict())
5574
5575
prints::
5576
5577
[['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5578
- color: light blue
5579
- posn: upper left
5580
- shape: SQUARE
5581
- texture: burlap
5582
SQUARE
5583
SQUARE
5584
{'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
5585
"""
5586
return Dict(OneOrMore(Group(key + value)))
5587
5588
def originalTextFor(expr, asString=True):
5589
"""Helper to return the original, untokenized text for a given
5590
expression. Useful to restore the parsed fields of an HTML start
5591
tag into the raw tag text itself, or to revert separate tokens with
5592
intervening whitespace back to the original matching input text. By
5593
default, returns astring containing the original parsed text.
5594
5595
If the optional ``asString`` argument is passed as
5596
``False``, then the return value is
5597
a :class:`ParseResults` containing any results names that
5598
were originally matched, and a single token containing the original
5599
matched text from the input string. So if the expression passed to
5600
:class:`originalTextFor` contains expressions with defined
5601
results names, you must set ``asString`` to ``False`` if you
5602
want to preserve those results name values.
5603
5604
Example::
5605
5606
src = "this is test <b> bold <i>text</i> </b> normal text "
5607
for tag in ("b", "i"):
5608
opener, closer = makeHTMLTags(tag)
5609
patt = originalTextFor(opener + SkipTo(closer) + closer)
5610
print(patt.searchString(src)[0])
5611
5612
prints::
5613
5614
['<b> bold <i>text</i> </b>']
5615
['<i>text</i>']
5616
"""
5617
locMarker = Empty().setParseAction(lambda s, loc, t: loc)
5618
endlocMarker = locMarker.copy()
5619
endlocMarker.callPreparse = False
5620
matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
5621
if asString:
5622
extractText = lambda s, l, t: s[t._original_start: t._original_end]
5623
else:
5624
def extractText(s, l, t):
5625
t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
5626
matchExpr.setParseAction(extractText)
5627
matchExpr.ignoreExprs = expr.ignoreExprs
5628
return matchExpr
5629
5630
def ungroup(expr):
5631
"""Helper to undo pyparsing's default grouping of And expressions,
5632
even if all but one are non-empty.
5633
"""
5634
return TokenConverter(expr).addParseAction(lambda t: t[0])
5635
5636
def locatedExpr(expr):
5637
"""Helper to decorate a returned token with its starting and ending
5638
locations in the input string.
5639
5640
This helper adds the following results names:
5641
5642
- locn_start = location where matched expression begins
5643
- locn_end = location where matched expression ends
5644
- value = the actual parsed results
5645
5646
Be careful if the input text contains ``<TAB>`` characters, you
5647
may want to call :class:`ParserElement.parseWithTabs`
5648
5649
Example::
5650
5651
wd = Word(alphas)
5652
for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
5653
print(match)
5654
5655
prints::
5656
5657
[[0, 'ljsdf', 5]]
5658
[[8, 'lksdjjf', 15]]
5659
[[18, 'lkkjj', 23]]
5660
"""
5661
locator = Empty().setParseAction(lambda s, l, t: l)
5662
return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
5663
5664
5665
# convenience constants for positional expressions
5666
empty = Empty().setName("empty")
5667
lineStart = LineStart().setName("lineStart")
5668
lineEnd = LineEnd().setName("lineEnd")
5669
stringStart = StringStart().setName("stringStart")
5670
stringEnd = StringEnd().setName("stringEnd")
5671
5672
_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction(lambda s, l, t: t[0][1])
5673
_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s, l, t: unichr(int(t[0].lstrip(r'\0x'), 16)))
5674
_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s, l, t: unichr(int(t[0][1:], 8)))
5675
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
5676
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
5677
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") + "]"
5678
5679
def srange(s):
5680
r"""Helper to easily define string ranges for use in Word
5681
construction. Borrows syntax from regexp '[]' string range
5682
definitions::
5683
5684
srange("[0-9]") -> "0123456789"
5685
srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
5686
srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
5687
5688
The input string must be enclosed in []'s, and the returned string
5689
is the expanded character set joined into a single string. The
5690
values enclosed in the []'s may be:
5691
5692
- a single character
5693
- an escaped character with a leading backslash (such as ``\-``
5694
or ``\]``)
5695
- an escaped hex character with a leading ``'\x'``
5696
(``\x21``, which is a ``'!'`` character) (``\0x##``
5697
is also supported for backwards compatibility)
5698
- an escaped octal character with a leading ``'\0'``
5699
(``\041``, which is a ``'!'`` character)
5700
- a range of any of the above, separated by a dash (``'a-z'``,
5701
etc.)
5702
- any combination of the above (``'aeiouy'``,
5703
``'a-zA-Z0-9_$'``, etc.)
5704
"""
5705
_expanded = lambda p: p if not isinstance(p, ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
5706
try:
5707
return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
5708
except Exception:
5709
return ""
5710
5711
def matchOnlyAtCol(n):
5712
"""Helper method for defining parse actions that require matching at
5713
a specific column in the input text.
5714
"""
5715
def verifyCol(strg, locn, toks):
5716
if col(locn, strg) != n:
5717
raise ParseException(strg, locn, "matched token not at column %d" % n)
5718
return verifyCol
5719
5720
def replaceWith(replStr):
5721
"""Helper method for common parse actions that simply return
5722
a literal value. Especially useful when used with
5723
:class:`transformString<ParserElement.transformString>` ().
5724
5725
Example::
5726
5727
num = Word(nums).setParseAction(lambda toks: int(toks[0]))
5728
na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
5729
term = na | num
5730
5731
OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
5732
"""
5733
return lambda s, l, t: [replStr]
5734
5735
def removeQuotes(s, l, t):
5736
"""Helper parse action for removing quotation marks from parsed
5737
quoted strings.
5738
5739
Example::
5740
5741
# by default, quotation marks are included in parsed results
5742
quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
5743
5744
# use removeQuotes to strip quotation marks from parsed results
5745
quotedString.setParseAction(removeQuotes)
5746
quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
5747
"""
5748
return t[0][1:-1]
5749
5750
def tokenMap(func, *args):
5751
"""Helper to define a parse action by mapping a function to all
5752
elements of a ParseResults list. If any additional args are passed,
5753
they are forwarded to the given function as additional arguments
5754
after the token, as in
5755
``hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))``,
5756
which will convert the parsed data to an integer using base 16.
5757
5758
Example (compare the last to example in :class:`ParserElement.transformString`::
5759
5760
hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
5761
hex_ints.runTests('''
5762
00 11 22 aa FF 0a 0d 1a
5763
''')
5764
5765
upperword = Word(alphas).setParseAction(tokenMap(str.upper))
5766
OneOrMore(upperword).runTests('''
5767
my kingdom for a horse
5768
''')
5769
5770
wd = Word(alphas).setParseAction(tokenMap(str.title))
5771
OneOrMore(wd).setParseAction(' '.join).runTests('''
5772
now is the winter of our discontent made glorious summer by this sun of york
5773
''')
5774
5775
prints::
5776
5777
00 11 22 aa FF 0a 0d 1a
5778
[0, 17, 34, 170, 255, 10, 13, 26]
5779
5780
my kingdom for a horse
5781
['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
5782
5783
now is the winter of our discontent made glorious summer by this sun of york
5784
['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
5785
"""
5786
def pa(s, l, t):
5787
return [func(tokn, *args) for tokn in t]
5788
5789
try:
5790
func_name = getattr(func, '__name__',
5791
getattr(func, '__class__').__name__)
5792
except Exception:
5793
func_name = str(func)
5794
pa.__name__ = func_name
5795
5796
return pa
5797
5798
upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
5799
"""(Deprecated) Helper parse action to convert tokens to upper case.
5800
Deprecated in favor of :class:`pyparsing_common.upcaseTokens`"""
5801
5802
downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
5803
"""(Deprecated) Helper parse action to convert tokens to lower case.
5804
Deprecated in favor of :class:`pyparsing_common.downcaseTokens`"""
5805
5806
def _makeTags(tagStr, xml,
5807
suppress_LT=Suppress("<"),
5808
suppress_GT=Suppress(">")):
5809
"""Internal helper to construct opening and closing tag expressions, given a tag name"""
5810
if isinstance(tagStr, basestring):
5811
resname = tagStr
5812
tagStr = Keyword(tagStr, caseless=not xml)
5813
else:
5814
resname = tagStr.name
5815
5816
tagAttrName = Word(alphas, alphanums + "_-:")
5817
if xml:
5818
tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes)
5819
openTag = (suppress_LT
5820
+ tagStr("tag")
5821
+ Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
5822
+ Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/')
5823
+ suppress_GT)
5824
else:
5825
tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word(printables, excludeChars=">")
5826
openTag = (suppress_LT
5827
+ tagStr("tag")
5828
+ Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens)
5829
+ Optional(Suppress("=") + tagAttrValue))))
5830
+ Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/')
5831
+ suppress_GT)
5832
closeTag = Combine(_L("</") + tagStr + ">", adjacent=False)
5833
5834
openTag.setName("<%s>" % resname)
5835
# add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
5836
openTag.addParseAction(lambda t: t.__setitem__("start" + "".join(resname.replace(":", " ").title().split()), t.copy()))
5837
closeTag = closeTag("end" + "".join(resname.replace(":", " ").title().split())).setName("</%s>" % resname)
5838
openTag.tag = resname
5839
closeTag.tag = resname
5840
openTag.tag_body = SkipTo(closeTag())
5841
return openTag, closeTag
5842
5843
def makeHTMLTags(tagStr):
5844
"""Helper to construct opening and closing tag expressions for HTML,
5845
given a tag name. Matches tags in either upper or lower case,
5846
attributes with namespaces and with quoted or unquoted values.
5847
5848
Example::
5849
5850
text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
5851
# makeHTMLTags returns pyparsing expressions for the opening and
5852
# closing tags as a 2-tuple
5853
a, a_end = makeHTMLTags("A")
5854
link_expr = a + SkipTo(a_end)("link_text") + a_end
5855
5856
for link in link_expr.searchString(text):
5857
# attributes in the <A> tag (like "href" shown here) are
5858
# also accessible as named results
5859
print(link.link_text, '->', link.href)
5860
5861
prints::
5862
5863
pyparsing -> https://github.com/pyparsing/pyparsing/wiki
5864
"""
5865
return _makeTags(tagStr, False)
5866
5867
def makeXMLTags(tagStr):
5868
"""Helper to construct opening and closing tag expressions for XML,
5869
given a tag name. Matches tags only in the given upper/lower case.
5870
5871
Example: similar to :class:`makeHTMLTags`
5872
"""
5873
return _makeTags(tagStr, True)
5874
5875
def withAttribute(*args, **attrDict):
5876
"""Helper to create a validating parse action to be used with start
5877
tags created with :class:`makeXMLTags` or
5878
:class:`makeHTMLTags`. Use ``withAttribute`` to qualify
5879
a starting tag with a required attribute value, to avoid false
5880
matches on common tags such as ``<TD>`` or ``<DIV>``.
5881
5882
Call ``withAttribute`` with a series of attribute names and
5883
values. Specify the list of filter attributes names and values as:
5884
5885
- keyword arguments, as in ``(align="right")``, or
5886
- as an explicit dict with ``**`` operator, when an attribute
5887
name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
5888
- a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
5889
5890
For attribute names with a namespace prefix, you must use the second
5891
form. Attribute names are matched insensitive to upper/lower case.
5892
5893
If just testing for ``class`` (with or without a namespace), use
5894
:class:`withClass`.
5895
5896
To verify that the attribute exists, but without specifying a value,
5897
pass ``withAttribute.ANY_VALUE`` as the value.
5898
5899
Example::
5900
5901
html = '''
5902
<div>
5903
Some text
5904
<div type="grid">1 4 0 1 0</div>
5905
<div type="graph">1,3 2,3 1,1</div>
5906
<div>this has no type</div>
5907
</div>
5908
5909
'''
5910
div,div_end = makeHTMLTags("div")
5911
5912
# only match div tag having a type attribute with value "grid"
5913
div_grid = div().setParseAction(withAttribute(type="grid"))
5914
grid_expr = div_grid + SkipTo(div | div_end)("body")
5915
for grid_header in grid_expr.searchString(html):
5916
print(grid_header.body)
5917
5918
# construct a match with any div tag having a type attribute, regardless of the value
5919
div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
5920
div_expr = div_any_type + SkipTo(div | div_end)("body")
5921
for div_header in div_expr.searchString(html):
5922
print(div_header.body)
5923
5924
prints::
5925
5926
1 4 0 1 0
5927
5928
1 4 0 1 0
5929
1,3 2,3 1,1
5930
"""
5931
if args:
5932
attrs = args[:]
5933
else:
5934
attrs = attrDict.items()
5935
attrs = [(k, v) for k, v in attrs]
5936
def pa(s, l, tokens):
5937
for attrName, attrValue in attrs:
5938
if attrName not in tokens:
5939
raise ParseException(s, l, "no matching attribute " + attrName)
5940
if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
5941
raise ParseException(s, l, "attribute '%s' has value '%s', must be '%s'" %
5942
(attrName, tokens[attrName], attrValue))
5943
return pa
5944
withAttribute.ANY_VALUE = object()
5945
5946
def withClass(classname, namespace=''):
5947
"""Simplified version of :class:`withAttribute` when
5948
matching on a div class - made difficult because ``class`` is
5949
a reserved word in Python.
5950
5951
Example::
5952
5953
html = '''
5954
<div>
5955
Some text
5956
<div class="grid">1 4 0 1 0</div>
5957
<div class="graph">1,3 2,3 1,1</div>
5958
<div>this &lt;div&gt; has no class</div>
5959
</div>
5960
5961
'''
5962
div,div_end = makeHTMLTags("div")
5963
div_grid = div().setParseAction(withClass("grid"))
5964
5965
grid_expr = div_grid + SkipTo(div | div_end)("body")
5966
for grid_header in grid_expr.searchString(html):
5967
print(grid_header.body)
5968
5969
div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
5970
div_expr = div_any_type + SkipTo(div | div_end)("body")
5971
for div_header in div_expr.searchString(html):
5972
print(div_header.body)
5973
5974
prints::
5975
5976
1 4 0 1 0
5977
5978
1 4 0 1 0
5979
1,3 2,3 1,1
5980
"""
5981
classattr = "%s:class" % namespace if namespace else "class"
5982
return withAttribute(**{classattr: classname})
5983
5984
opAssoc = SimpleNamespace()
5985
opAssoc.LEFT = object()
5986
opAssoc.RIGHT = object()
5987
5988
def infixNotation(baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')')):
5989
"""Helper method for constructing grammars of expressions made up of
5990
operators working in a precedence hierarchy. Operators may be unary
5991
or binary, left- or right-associative. Parse actions can also be
5992
attached to operator expressions. The generated parser will also
5993
recognize the use of parentheses to override operator precedences
5994
(see example below).
5995
5996
Note: if you define a deep operator list, you may see performance
5997
issues when using infixNotation. See
5998
:class:`ParserElement.enablePackrat` for a mechanism to potentially
5999
improve your parser performance.
6000
6001
Parameters:
6002
- baseExpr - expression representing the most basic element for the
6003
nested
6004
- opList - list of tuples, one for each operator precedence level
6005
in the expression grammar; each tuple is of the form ``(opExpr,
6006
numTerms, rightLeftAssoc, parseAction)``, where:
6007
6008
- opExpr is the pyparsing expression for the operator; may also
6009
be a string, which will be converted to a Literal; if numTerms
6010
is 3, opExpr is a tuple of two expressions, for the two
6011
operators separating the 3 terms
6012
- numTerms is the number of terms for this operator (must be 1,
6013
2, or 3)
6014
- rightLeftAssoc is the indicator whether the operator is right
6015
or left associative, using the pyparsing-defined constants
6016
``opAssoc.RIGHT`` and ``opAssoc.LEFT``.
6017
- parseAction is the parse action to be associated with
6018
expressions matching this operator expression (the parse action
6019
tuple member may be omitted); if the parse action is passed
6020
a tuple or list of functions, this is equivalent to calling
6021
``setParseAction(*fn)``
6022
(:class:`ParserElement.setParseAction`)
6023
- lpar - expression for matching left-parentheses
6024
(default= ``Suppress('(')``)
6025
- rpar - expression for matching right-parentheses
6026
(default= ``Suppress(')')``)
6027
6028
Example::
6029
6030
# simple example of four-function arithmetic with ints and
6031
# variable names
6032
integer = pyparsing_common.signed_integer
6033
varname = pyparsing_common.identifier
6034
6035
arith_expr = infixNotation(integer | varname,
6036
[
6037
('-', 1, opAssoc.RIGHT),
6038
(oneOf('* /'), 2, opAssoc.LEFT),
6039
(oneOf('+ -'), 2, opAssoc.LEFT),
6040
])
6041
6042
arith_expr.runTests('''
6043
5+3*6
6044
(5+3)*6
6045
-2--11
6046
''', fullDump=False)
6047
6048
prints::
6049
6050
5+3*6
6051
[[5, '+', [3, '*', 6]]]
6052
6053
(5+3)*6
6054
[[[5, '+', 3], '*', 6]]
6055
6056
-2--11
6057
[[['-', 2], '-', ['-', 11]]]
6058
"""
6059
# captive version of FollowedBy that does not do parse actions or capture results names
6060
class _FB(FollowedBy):
6061
def parseImpl(self, instring, loc, doActions=True):
6062
self.expr.tryParse(instring, loc)
6063
return loc, []
6064
6065
ret = Forward()
6066
lastExpr = baseExpr | (lpar + ret + rpar)
6067
for i, operDef in enumerate(opList):
6068
opExpr, arity, rightLeftAssoc, pa = (operDef + (None, ))[:4]
6069
termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
6070
if arity == 3:
6071
if opExpr is None or len(opExpr) != 2:
6072
raise ValueError(
6073
"if numterms=3, opExpr must be a tuple or list of two expressions")
6074
opExpr1, opExpr2 = opExpr
6075
thisExpr = Forward().setName(termName)
6076
if rightLeftAssoc == opAssoc.LEFT:
6077
if arity == 1:
6078
matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + OneOrMore(opExpr))
6079
elif arity == 2:
6080
if opExpr is not None:
6081
matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(lastExpr + OneOrMore(opExpr + lastExpr))
6082
else:
6083
matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr + OneOrMore(lastExpr))
6084
elif arity == 3:
6085
matchExpr = (_FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr)
6086
+ Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)))
6087
else:
6088
raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
6089
elif rightLeftAssoc == opAssoc.RIGHT:
6090
if arity == 1:
6091
# try to avoid LR with this extra test
6092
if not isinstance(opExpr, Optional):
6093
opExpr = Optional(opExpr)
6094
matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
6095
elif arity == 2:
6096
if opExpr is not None:
6097
matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(lastExpr + OneOrMore(opExpr + thisExpr))
6098
else:
6099
matchExpr = _FB(lastExpr + thisExpr) + Group(lastExpr + OneOrMore(thisExpr))
6100
elif arity == 3:
6101
matchExpr = (_FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
6102
+ Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr))
6103
else:
6104
raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
6105
else:
6106
raise ValueError("operator must indicate right or left associativity")
6107
if pa:
6108
if isinstance(pa, (tuple, list)):
6109
matchExpr.setParseAction(*pa)
6110
else:
6111
matchExpr.setParseAction(pa)
6112
thisExpr <<= (matchExpr.setName(termName) | lastExpr)
6113
lastExpr = thisExpr
6114
ret <<= lastExpr
6115
return ret
6116
6117
operatorPrecedence = infixNotation
6118
"""(Deprecated) Former name of :class:`infixNotation`, will be
6119
dropped in a future release."""
6120
6121
dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').setName("string enclosed in double quotes")
6122
sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("string enclosed in single quotes")
6123
quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6124
| Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("quotedString using single or double quotes")
6125
unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
6126
6127
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
6128
"""Helper method for defining nested lists enclosed in opening and
6129
closing delimiters ("(" and ")" are the default).
6130
6131
Parameters:
6132
- opener - opening character for a nested list
6133
(default= ``"("``); can also be a pyparsing expression
6134
- closer - closing character for a nested list
6135
(default= ``")"``); can also be a pyparsing expression
6136
- content - expression for items within the nested lists
6137
(default= ``None``)
6138
- ignoreExpr - expression for ignoring opening and closing
6139
delimiters (default= :class:`quotedString`)
6140
6141
If an expression is not provided for the content argument, the
6142
nested expression will capture all whitespace-delimited content
6143
between delimiters as a list of separate values.
6144
6145
Use the ``ignoreExpr`` argument to define expressions that may
6146
contain opening or closing characters that should not be treated as
6147
opening or closing characters for nesting, such as quotedString or
6148
a comment expression. Specify multiple expressions using an
6149
:class:`Or` or :class:`MatchFirst`. The default is
6150
:class:`quotedString`, but if no expressions are to be ignored, then
6151
pass ``None`` for this argument.
6152
6153
Example::
6154
6155
data_type = oneOf("void int short long char float double")
6156
decl_data_type = Combine(data_type + Optional(Word('*')))
6157
ident = Word(alphas+'_', alphanums+'_')
6158
number = pyparsing_common.number
6159
arg = Group(decl_data_type + ident)
6160
LPAR, RPAR = map(Suppress, "()")
6161
6162
code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
6163
6164
c_function = (decl_data_type("type")
6165
+ ident("name")
6166
+ LPAR + Optional(delimitedList(arg), [])("args") + RPAR
6167
+ code_body("body"))
6168
c_function.ignore(cStyleComment)
6169
6170
source_code = '''
6171
int is_odd(int x) {
6172
return (x%2);
6173
}
6174
6175
int dec_to_hex(char hchar) {
6176
if (hchar >= '0' && hchar <= '9') {
6177
return (ord(hchar)-ord('0'));
6178
} else {
6179
return (10+ord(hchar)-ord('A'));
6180
}
6181
}
6182
'''
6183
for func in c_function.searchString(source_code):
6184
print("%(name)s (%(type)s) args: %(args)s" % func)
6185
6186
6187
prints::
6188
6189
is_odd (int) args: [['int', 'x']]
6190
dec_to_hex (int) args: [['char', 'hchar']]
6191
"""
6192
if opener == closer:
6193
raise ValueError("opening and closing strings cannot be the same")
6194
if content is None:
6195
if isinstance(opener, basestring) and isinstance(closer, basestring):
6196
if len(opener) == 1 and len(closer) == 1:
6197
if ignoreExpr is not None:
6198
content = (Combine(OneOrMore(~ignoreExpr
6199
+ CharsNotIn(opener
6200
+ closer
6201
+ ParserElement.DEFAULT_WHITE_CHARS, exact=1)
6202
)
6203
).setParseAction(lambda t: t[0].strip()))
6204
else:
6205
content = (empty.copy() + CharsNotIn(opener
6206
+ closer
6207
+ ParserElement.DEFAULT_WHITE_CHARS
6208
).setParseAction(lambda t: t[0].strip()))
6209
else:
6210
if ignoreExpr is not None:
6211
content = (Combine(OneOrMore(~ignoreExpr
6212
+ ~Literal(opener)
6213
+ ~Literal(closer)
6214
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1))
6215
).setParseAction(lambda t: t[0].strip()))
6216
else:
6217
content = (Combine(OneOrMore(~Literal(opener)
6218
+ ~Literal(closer)
6219
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1))
6220
).setParseAction(lambda t: t[0].strip()))
6221
else:
6222
raise ValueError("opening and closing arguments must be strings if no content expression is given")
6223
ret = Forward()
6224
if ignoreExpr is not None:
6225
ret <<= Group(Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer))
6226
else:
6227
ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
6228
ret.setName('nested %s%s expression' % (opener, closer))
6229
return ret
6230
6231
def indentedBlock(blockStatementExpr, indentStack, indent=True):
6232
"""Helper method for defining space-delimited indentation blocks,
6233
such as those used to define block statements in Python source code.
6234
6235
Parameters:
6236
6237
- blockStatementExpr - expression defining syntax of statement that
6238
is repeated within the indented block
6239
- indentStack - list created by caller to manage indentation stack
6240
(multiple statementWithIndentedBlock expressions within a single
6241
grammar should share a common indentStack)
6242
- indent - boolean indicating whether block must be indented beyond
6243
the current level; set to False for block of left-most
6244
statements (default= ``True``)
6245
6246
A valid block must contain at least one ``blockStatement``.
6247
6248
Example::
6249
6250
data = '''
6251
def A(z):
6252
A1
6253
B = 100
6254
G = A2
6255
A2
6256
A3
6257
B
6258
def BB(a,b,c):
6259
BB1
6260
def BBA():
6261
bba1
6262
bba2
6263
bba3
6264
C
6265
D
6266
def spam(x,y):
6267
def eggs(z):
6268
pass
6269
'''
6270
6271
6272
indentStack = [1]
6273
stmt = Forward()
6274
6275
identifier = Word(alphas, alphanums)
6276
funcDecl = ("def" + identifier + Group("(" + Optional(delimitedList(identifier)) + ")") + ":")
6277
func_body = indentedBlock(stmt, indentStack)
6278
funcDef = Group(funcDecl + func_body)
6279
6280
rvalue = Forward()
6281
funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
6282
rvalue << (funcCall | identifier | Word(nums))
6283
assignment = Group(identifier + "=" + rvalue)
6284
stmt << (funcDef | assignment | identifier)
6285
6286
module_body = OneOrMore(stmt)
6287
6288
parseTree = module_body.parseString(data)
6289
parseTree.pprint()
6290
6291
prints::
6292
6293
[['def',
6294
'A',
6295
['(', 'z', ')'],
6296
':',
6297
[['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
6298
'B',
6299
['def',
6300
'BB',
6301
['(', 'a', 'b', 'c', ')'],
6302
':',
6303
[['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
6304
'C',
6305
'D',
6306
['def',
6307
'spam',
6308
['(', 'x', 'y', ')'],
6309
':',
6310
[[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
6311
"""
6312
backup_stack = indentStack[:]
6313
6314
def reset_stack():
6315
indentStack[:] = backup_stack
6316
6317
def checkPeerIndent(s, l, t):
6318
if l >= len(s): return
6319
curCol = col(l, s)
6320
if curCol != indentStack[-1]:
6321
if curCol > indentStack[-1]:
6322
raise ParseException(s, l, "illegal nesting")
6323
raise ParseException(s, l, "not a peer entry")
6324
6325
def checkSubIndent(s, l, t):
6326
curCol = col(l, s)
6327
if curCol > indentStack[-1]:
6328
indentStack.append(curCol)
6329
else:
6330
raise ParseException(s, l, "not a subentry")
6331
6332
def checkUnindent(s, l, t):
6333
if l >= len(s): return
6334
curCol = col(l, s)
6335
if not(indentStack and curCol in indentStack):
6336
raise ParseException(s, l, "not an unindent")
6337
if curCol < indentStack[-1]:
6338
indentStack.pop()
6339
6340
NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress(), stopOn=StringEnd())
6341
INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
6342
PEER = Empty().setParseAction(checkPeerIndent).setName('')
6343
UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
6344
if indent:
6345
smExpr = Group(Optional(NL)
6346
+ INDENT
6347
+ OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd())
6348
+ UNDENT)
6349
else:
6350
smExpr = Group(Optional(NL)
6351
+ OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd())
6352
+ UNDENT)
6353
smExpr.setFailAction(lambda a, b, c, d: reset_stack())
6354
blockStatementExpr.ignore(_bslash + LineEnd())
6355
return smExpr.setName('indented block')
6356
6357
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6358
punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6359
6360
anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums + "_:").setName('any tag'))
6361
_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), '><& "\''))
6362
commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
6363
def replaceHTMLEntity(t):
6364
"""Helper parser action to replace common HTML entities with their special characters"""
6365
return _htmlEntityMap.get(t.entity)
6366
6367
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
6368
cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
6369
"Comment of the form ``/* ... */``"
6370
6371
htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
6372
"Comment of the form ``<!-- ... -->``"
6373
6374
restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
6375
dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
6376
"Comment of the form ``// ... (to end of line)``"
6377
6378
cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/' | dblSlashComment).setName("C++ style comment")
6379
"Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`"
6380
6381
javaStyleComment = cppStyleComment
6382
"Same as :class:`cppStyleComment`"
6383
6384
pythonStyleComment = Regex(r"#.*").setName("Python style comment")
6385
"Comment of the form ``# ... (to end of line)``"
6386
6387
_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',')
6388
+ Optional(Word(" \t")
6389
+ ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem")
6390
commaSeparatedList = delimitedList(Optional(quotedString.copy() | _commasepitem, default="")).setName("commaSeparatedList")
6391
"""(Deprecated) Predefined expression of 1 or more printable words or
6392
quoted strings, separated by commas.
6393
6394
This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`.
6395
"""
6396
6397
# some other useful expressions - using lower-case class name since we are really using this as a namespace
6398
class pyparsing_common:
6399
"""Here are some common low-level expressions that may be useful in
6400
jump-starting parser development:
6401
6402
- numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
6403
:class:`scientific notation<sci_real>`)
6404
- common :class:`programming identifiers<identifier>`
6405
- network addresses (:class:`MAC<mac_address>`,
6406
:class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
6407
- ISO8601 :class:`dates<iso8601_date>` and
6408
:class:`datetime<iso8601_datetime>`
6409
- :class:`UUID<uuid>`
6410
- :class:`comma-separated list<comma_separated_list>`
6411
6412
Parse actions:
6413
6414
- :class:`convertToInteger`
6415
- :class:`convertToFloat`
6416
- :class:`convertToDate`
6417
- :class:`convertToDatetime`
6418
- :class:`stripHTMLTags`
6419
- :class:`upcaseTokens`
6420
- :class:`downcaseTokens`
6421
6422
Example::
6423
6424
pyparsing_common.number.runTests('''
6425
# any int or real number, returned as the appropriate type
6426
100
6427
-100
6428
+100
6429
3.14159
6430
6.02e23
6431
1e-12
6432
''')
6433
6434
pyparsing_common.fnumber.runTests('''
6435
# any int or real number, returned as float
6436
100
6437
-100
6438
+100
6439
3.14159
6440
6.02e23
6441
1e-12
6442
''')
6443
6444
pyparsing_common.hex_integer.runTests('''
6445
# hex numbers
6446
100
6447
FF
6448
''')
6449
6450
pyparsing_common.fraction.runTests('''
6451
# fractions
6452
1/2
6453
-3/4
6454
''')
6455
6456
pyparsing_common.mixed_integer.runTests('''
6457
# mixed fractions
6458
1
6459
1/2
6460
-3/4
6461
1-3/4
6462
''')
6463
6464
import uuid
6465
pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
6466
pyparsing_common.uuid.runTests('''
6467
# uuid
6468
12345678-1234-5678-1234-567812345678
6469
''')
6470
6471
prints::
6472
6473
# any int or real number, returned as the appropriate type
6474
100
6475
[100]
6476
6477
-100
6478
[-100]
6479
6480
+100
6481
[100]
6482
6483
3.14159
6484
[3.14159]
6485
6486
6.02e23
6487
[6.02e+23]
6488
6489
1e-12
6490
[1e-12]
6491
6492
# any int or real number, returned as float
6493
100
6494
[100.0]
6495
6496
-100
6497
[-100.0]
6498
6499
+100
6500
[100.0]
6501
6502
3.14159
6503
[3.14159]
6504
6505
6.02e23
6506
[6.02e+23]
6507
6508
1e-12
6509
[1e-12]
6510
6511
# hex numbers
6512
100
6513
[256]
6514
6515
FF
6516
[255]
6517
6518
# fractions
6519
1/2
6520
[0.5]
6521
6522
-3/4
6523
[-0.75]
6524
6525
# mixed fractions
6526
1
6527
[1]
6528
6529
1/2
6530
[0.5]
6531
6532
-3/4
6533
[-0.75]
6534
6535
1-3/4
6536
[1.75]
6537
6538
# uuid
6539
12345678-1234-5678-1234-567812345678
6540
[UUID('12345678-1234-5678-1234-567812345678')]
6541
"""
6542
6543
convertToInteger = tokenMap(int)
6544
"""
6545
Parse action for converting parsed integers to Python int
6546
"""
6547
6548
convertToFloat = tokenMap(float)
6549
"""
6550
Parse action for converting parsed numbers to Python float
6551
"""
6552
6553
integer = Word(nums).setName("integer").setParseAction(convertToInteger)
6554
"""expression that parses an unsigned integer, returns an int"""
6555
6556
hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int, 16))
6557
"""expression that parses a hexadecimal integer, returns an int"""
6558
6559
signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
6560
"""expression that parses an integer with optional leading sign, returns an int"""
6561
6562
fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
6563
"""fractional expression of an integer divided by an integer, returns a float"""
6564
fraction.addParseAction(lambda t: t[0]/t[-1])
6565
6566
mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
6567
"""mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
6568
mixed_integer.addParseAction(sum)
6569
6570
real = Regex(r'[+-]?(?:\d+\.\d*|\.\d+)').setName("real number").setParseAction(convertToFloat)
6571
"""expression that parses a floating point number and returns a float"""
6572
6573
sci_real = Regex(r'[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
6574
"""expression that parses a floating point number with optional
6575
scientific notation and returns a float"""
6576
6577
# streamlining this expression makes the docs nicer-looking
6578
number = (sci_real | real | signed_integer).streamline()
6579
"""any numeric expression, returns the corresponding Python type"""
6580
6581
fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
6582
"""any int or real number, returned as float"""
6583
6584
identifier = Word(alphas + '_', alphanums + '_').setName("identifier")
6585
"""typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
6586
6587
ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
6588
"IPv4 address (``0.0.0.0 - 255.255.255.255``)"
6589
6590
_ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
6591
_full_ipv6_address = (_ipv6_part + (':' + _ipv6_part) * 7).setName("full IPv6 address")
6592
_short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6))
6593
+ "::"
6594
+ Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6))
6595
).setName("short IPv6 address")
6596
_short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
6597
_mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
6598
ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
6599
"IPv6 address (long, short, or mixed form)"
6600
6601
mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
6602
"MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
6603
6604
@staticmethod
6605
def convertToDate(fmt="%Y-%m-%d"):
6606
"""
6607
Helper to create a parse action for converting parsed date string to Python datetime.date
6608
6609
Params -
6610
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
6611
6612
Example::
6613
6614
date_expr = pyparsing_common.iso8601_date.copy()
6615
date_expr.setParseAction(pyparsing_common.convertToDate())
6616
print(date_expr.parseString("1999-12-31"))
6617
6618
prints::
6619
6620
[datetime.date(1999, 12, 31)]
6621
"""
6622
def cvt_fn(s, l, t):
6623
try:
6624
return datetime.strptime(t[0], fmt).date()
6625
except ValueError as ve:
6626
raise ParseException(s, l, str(ve))
6627
return cvt_fn
6628
6629
@staticmethod
6630
def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
6631
"""Helper to create a parse action for converting parsed
6632
datetime string to Python datetime.datetime
6633
6634
Params -
6635
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
6636
6637
Example::
6638
6639
dt_expr = pyparsing_common.iso8601_datetime.copy()
6640
dt_expr.setParseAction(pyparsing_common.convertToDatetime())
6641
print(dt_expr.parseString("1999-12-31T23:59:59.999"))
6642
6643
prints::
6644
6645
[datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
6646
"""
6647
def cvt_fn(s, l, t):
6648
try:
6649
return datetime.strptime(t[0], fmt)
6650
except ValueError as ve:
6651
raise ParseException(s, l, str(ve))
6652
return cvt_fn
6653
6654
iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
6655
"ISO8601 date (``yyyy-mm-dd``)"
6656
6657
iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
6658
"ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
6659
6660
uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
6661
"UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
6662
6663
_html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
6664
@staticmethod
6665
def stripHTMLTags(s, l, tokens):
6666
"""Parse action to remove HTML tags from web page HTML source
6667
6668
Example::
6669
6670
# strip HTML links from normal text
6671
text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
6672
td, td_end = makeHTMLTags("TD")
6673
table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
6674
print(table_text.parseString(text).body)
6675
6676
Prints::
6677
6678
More info at the pyparsing wiki page
6679
"""
6680
return pyparsing_common._html_stripper.transformString(tokens[0])
6681
6682
_commasepitem = Combine(OneOrMore(~Literal(",")
6683
+ ~LineEnd()
6684
+ Word(printables, excludeChars=',')
6685
+ Optional(White(" \t")))).streamline().setName("commaItem")
6686
comma_separated_list = delimitedList(Optional(quotedString.copy()
6687
| _commasepitem, default='')
6688
).setName("comma separated list")
6689
"""Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
6690
6691
upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
6692
"""Parse action to convert tokens to upper case."""
6693
6694
downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
6695
"""Parse action to convert tokens to lower case."""
6696
6697
6698
class _lazyclassproperty(object):
6699
def __init__(self, fn):
6700
self.fn = fn
6701
self.__doc__ = fn.__doc__
6702
self.__name__ = fn.__name__
6703
6704
def __get__(self, obj, cls):
6705
if cls is None:
6706
cls = type(obj)
6707
if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', [])
6708
for superclass in cls.__mro__[1:]):
6709
cls._intern = {}
6710
attrname = self.fn.__name__
6711
if attrname not in cls._intern:
6712
cls._intern[attrname] = self.fn(cls)
6713
return cls._intern[attrname]
6714
6715
6716
class unicode_set(object):
6717
"""
6718
A set of Unicode characters, for language-specific strings for
6719
``alphas``, ``nums``, ``alphanums``, and ``printables``.
6720
A unicode_set is defined by a list of ranges in the Unicode character
6721
set, in a class attribute ``_ranges``, such as::
6722
6723
_ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
6724
6725
A unicode set can also be defined using multiple inheritance of other unicode sets::
6726
6727
class CJK(Chinese, Japanese, Korean):
6728
pass
6729
"""
6730
_ranges = []
6731
6732
@classmethod
6733
def _get_chars_for_ranges(cls):
6734
ret = []
6735
for cc in cls.__mro__:
6736
if cc is unicode_set:
6737
break
6738
for rr in cc._ranges:
6739
ret.extend(range(rr[0], rr[-1] + 1))
6740
return [unichr(c) for c in sorted(set(ret))]
6741
6742
@_lazyclassproperty
6743
def printables(cls):
6744
"all non-whitespace characters in this range"
6745
return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges()))
6746
6747
@_lazyclassproperty
6748
def alphas(cls):
6749
"all alphabetic characters in this range"
6750
return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges()))
6751
6752
@_lazyclassproperty
6753
def nums(cls):
6754
"all numeric digit characters in this range"
6755
return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges()))
6756
6757
@_lazyclassproperty
6758
def alphanums(cls):
6759
"all alphanumeric characters in this range"
6760
return cls.alphas + cls.nums
6761
6762
6763
class pyparsing_unicode(unicode_set):
6764
"""
6765
A namespace class for defining common language unicode_sets.
6766
"""
6767
_ranges = [(32, sys.maxunicode)]
6768
6769
class Latin1(unicode_set):
6770
"Unicode set for Latin-1 Unicode Character Range"
6771
_ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
6772
6773
class LatinA(unicode_set):
6774
"Unicode set for Latin-A Unicode Character Range"
6775
_ranges = [(0x0100, 0x017f),]
6776
6777
class LatinB(unicode_set):
6778
"Unicode set for Latin-B Unicode Character Range"
6779
_ranges = [(0x0180, 0x024f),]
6780
6781
class Greek(unicode_set):
6782
"Unicode set for Greek Unicode Character Ranges"
6783
_ranges = [
6784
(0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d),
6785
(0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4),
6786
(0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe),
6787
]
6788
6789
class Cyrillic(unicode_set):
6790
"Unicode set for Cyrillic Unicode Character Range"
6791
_ranges = [(0x0400, 0x04ff)]
6792
6793
class Chinese(unicode_set):
6794
"Unicode set for Chinese Unicode Character Range"
6795
_ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f),]
6796
6797
class Japanese(unicode_set):
6798
"Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
6799
_ranges = []
6800
6801
class Kanji(unicode_set):
6802
"Unicode set for Kanji Unicode Character Range"
6803
_ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f),]
6804
6805
class Hiragana(unicode_set):
6806
"Unicode set for Hiragana Unicode Character Range"
6807
_ranges = [(0x3040, 0x309f),]
6808
6809
class Katakana(unicode_set):
6810
"Unicode set for Katakana Unicode Character Range"
6811
_ranges = [(0x30a0, 0x30ff),]
6812
6813
class Korean(unicode_set):
6814
"Unicode set for Korean Unicode Character Range"
6815
_ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f),]
6816
6817
class CJK(Chinese, Japanese, Korean):
6818
"Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
6819
pass
6820
6821
class Thai(unicode_set):
6822
"Unicode set for Thai Unicode Character Range"
6823
_ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b),]
6824
6825
class Arabic(unicode_set):
6826
"Unicode set for Arabic Unicode Character Range"
6827
_ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f),]
6828
6829
class Hebrew(unicode_set):
6830
"Unicode set for Hebrew Unicode Character Range"
6831
_ranges = [(0x0590, 0x05ff),]
6832
6833
class Devanagari(unicode_set):
6834
"Unicode set for Devanagari Unicode Character Range"
6835
_ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)]
6836
6837
pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges
6838
+ pyparsing_unicode.Japanese.Hiragana._ranges
6839
+ pyparsing_unicode.Japanese.Katakana._ranges)
6840
6841
# define ranges in language character sets
6842
if PY_3:
6843
setattr(pyparsing_unicode, u"العربية", pyparsing_unicode.Arabic)
6844
setattr(pyparsing_unicode, u"中文", pyparsing_unicode.Chinese)
6845
setattr(pyparsing_unicode, u"кириллица", pyparsing_unicode.Cyrillic)
6846
setattr(pyparsing_unicode, u"Ελληνικά", pyparsing_unicode.Greek)
6847
setattr(pyparsing_unicode, u"עִברִית", pyparsing_unicode.Hebrew)
6848
setattr(pyparsing_unicode, u"日本語", pyparsing_unicode.Japanese)
6849
setattr(pyparsing_unicode.Japanese, u"漢字", pyparsing_unicode.Japanese.Kanji)
6850
setattr(pyparsing_unicode.Japanese, u"カタカナ", pyparsing_unicode.Japanese.Katakana)
6851
setattr(pyparsing_unicode.Japanese, u"ひらがな", pyparsing_unicode.Japanese.Hiragana)
6852
setattr(pyparsing_unicode, u"한국어", pyparsing_unicode.Korean)
6853
setattr(pyparsing_unicode, u"ไทย", pyparsing_unicode.Thai)
6854
setattr(pyparsing_unicode, u"देवनागरी", pyparsing_unicode.Devanagari)
6855
6856
6857
class pyparsing_test:
6858
"""
6859
namespace class for classes useful in writing unit tests
6860
"""
6861
6862
class reset_pyparsing_context:
6863
"""
6864
Context manager to be used when writing unit tests that modify pyparsing config values:
6865
- packrat parsing
6866
- default whitespace characters.
6867
- default keyword characters
6868
- literal string auto-conversion class
6869
- __diag__ settings
6870
6871
Example:
6872
with reset_pyparsing_context():
6873
# test that literals used to construct a grammar are automatically suppressed
6874
ParserElement.inlineLiteralsUsing(Suppress)
6875
6876
term = Word(alphas) | Word(nums)
6877
group = Group('(' + term[...] + ')')
6878
6879
# assert that the '()' characters are not included in the parsed tokens
6880
self.assertParseAndCheckLisst(group, "(abc 123 def)", ['abc', '123', 'def'])
6881
6882
# after exiting context manager, literals are converted to Literal expressions again
6883
"""
6884
6885
def __init__(self):
6886
self._save_context = {}
6887
6888
def save(self):
6889
self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS
6890
self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS
6891
self._save_context[
6892
"literal_string_class"
6893
] = ParserElement._literalStringClass
6894
self._save_context["packrat_enabled"] = ParserElement._packratEnabled
6895
self._save_context["packrat_parse"] = ParserElement._parse
6896
self._save_context["__diag__"] = {
6897
name: getattr(__diag__, name) for name in __diag__._all_names
6898
}
6899
self._save_context["__compat__"] = {
6900
"collect_all_And_tokens": __compat__.collect_all_And_tokens
6901
}
6902
return self
6903
6904
def restore(self):
6905
# reset pyparsing global state
6906
if (
6907
ParserElement.DEFAULT_WHITE_CHARS
6908
!= self._save_context["default_whitespace"]
6909
):
6910
ParserElement.setDefaultWhitespaceChars(
6911
self._save_context["default_whitespace"]
6912
)
6913
Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"]
6914
ParserElement.inlineLiteralsUsing(
6915
self._save_context["literal_string_class"]
6916
)
6917
for name, value in self._save_context["__diag__"].items():
6918
setattr(__diag__, name, value)
6919
ParserElement._packratEnabled = self._save_context["packrat_enabled"]
6920
ParserElement._parse = self._save_context["packrat_parse"]
6921
__compat__.collect_all_And_tokens = self._save_context["__compat__"]
6922
6923
def __enter__(self):
6924
return self.save()
6925
6926
def __exit__(self, *args):
6927
return self.restore()
6928
6929
class TestParseResultsAsserts:
6930
"""
6931
A mixin class to add parse results assertion methods to normal unittest.TestCase classes.
6932
"""
6933
def assertParseResultsEquals(
6934
self, result, expected_list=None, expected_dict=None, msg=None
6935
):
6936
"""
6937
Unit test assertion to compare a ParseResults object with an optional expected_list,
6938
and compare any defined results names with an optional expected_dict.
6939
"""
6940
if expected_list is not None:
6941
self.assertEqual(expected_list, result.asList(), msg=msg)
6942
if expected_dict is not None:
6943
self.assertEqual(expected_dict, result.asDict(), msg=msg)
6944
6945
def assertParseAndCheckList(
6946
self, expr, test_string, expected_list, msg=None, verbose=True
6947
):
6948
"""
6949
Convenience wrapper assert to test a parser element and input string, and assert that
6950
the resulting ParseResults.asList() is equal to the expected_list.
6951
"""
6952
result = expr.parseString(test_string, parseAll=True)
6953
if verbose:
6954
print(result.dump())
6955
self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg)
6956
6957
def assertParseAndCheckDict(
6958
self, expr, test_string, expected_dict, msg=None, verbose=True
6959
):
6960
"""
6961
Convenience wrapper assert to test a parser element and input string, and assert that
6962
the resulting ParseResults.asDict() is equal to the expected_dict.
6963
"""
6964
result = expr.parseString(test_string, parseAll=True)
6965
if verbose:
6966
print(result.dump())
6967
self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg)
6968
6969
def assertRunTestResults(
6970
self, run_tests_report, expected_parse_results=None, msg=None
6971
):
6972
"""
6973
Unit test assertion to evaluate output of ParserElement.runTests(). If a list of
6974
list-dict tuples is given as the expected_parse_results argument, then these are zipped
6975
with the report tuples returned by runTests and evaluated using assertParseResultsEquals.
6976
Finally, asserts that the overall runTests() success value is True.
6977
6978
:param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests
6979
:param expected_parse_results (optional): [tuple(str, list, dict, Exception)]
6980
"""
6981
run_test_success, run_test_results = run_tests_report
6982
6983
if expected_parse_results is not None:
6984
merged = [
6985
(rpt[0], rpt[1], expected)
6986
for rpt, expected in zip(run_test_results, expected_parse_results)
6987
]
6988
for test_string, result, expected in merged:
6989
# expected should be a tuple containing a list and/or a dict or an exception,
6990
# and optional failure message string
6991
# an empty tuple will skip any result validation
6992
fail_msg = next(
6993
(exp for exp in expected if isinstance(exp, str)), None
6994
)
6995
expected_exception = next(
6996
(
6997
exp
6998
for exp in expected
6999
if isinstance(exp, type) and issubclass(exp, Exception)
7000
),
7001
None,
7002
)
7003
if expected_exception is not None:
7004
with self.assertRaises(
7005
expected_exception=expected_exception, msg=fail_msg or msg
7006
):
7007
if isinstance(result, Exception):
7008
raise result
7009
else:
7010
expected_list = next(
7011
(exp for exp in expected if isinstance(exp, list)), None
7012
)
7013
expected_dict = next(
7014
(exp for exp in expected if isinstance(exp, dict)), None
7015
)
7016
if (expected_list, expected_dict) != (None, None):
7017
self.assertParseResultsEquals(
7018
result,
7019
expected_list=expected_list,
7020
expected_dict=expected_dict,
7021
msg=fail_msg or msg,
7022
)
7023
else:
7024
# warning here maybe?
7025
print("no validation for {!r}".format(test_string))
7026
7027
# do this last, in case some specific test results can be reported instead
7028
self.assertTrue(
7029
run_test_success, msg=msg if msg is not None else "failed runTests"
7030
)
7031
7032
@contextmanager
7033
def assertRaisesParseException(self, exc_type=ParseException, msg=None):
7034
with self.assertRaises(exc_type, msg=msg):
7035
yield
7036
7037
7038
if __name__ == "__main__":
7039
7040
selectToken = CaselessLiteral("select")
7041
fromToken = CaselessLiteral("from")
7042
7043
ident = Word(alphas, alphanums + "_$")
7044
7045
columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
7046
columnNameList = Group(delimitedList(columnName)).setName("columns")
7047
columnSpec = ('*' | columnNameList)
7048
7049
tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
7050
tableNameList = Group(delimitedList(tableName)).setName("tables")
7051
7052
simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
7053
7054
# demo runTests method, including embedded comments in test string
7055
simpleSQL.runTests("""
7056
# '*' as column list and dotted table name
7057
select * from SYS.XYZZY
7058
7059
# caseless match on "SELECT", and casts back to "select"
7060
SELECT * from XYZZY, ABC
7061
7062
# list of column names, and mixed case SELECT keyword
7063
Select AA,BB,CC from Sys.dual
7064
7065
# multiple tables
7066
Select A, B, C from Sys.dual, Table2
7067
7068
# invalid SELECT keyword - should fail
7069
Xelect A, B, C from Sys.dual
7070
7071
# incomplete command - should fail
7072
Select
7073
7074
# invalid column name - should fail
7075
Select ^^^ frox Sys.dual
7076
7077
""")
7078
7079
pyparsing_common.number.runTests("""
7080
100
7081
-100
7082
+100
7083
3.14159
7084
6.02e23
7085
1e-12
7086
""")
7087
7088
# any int or real number, returned as float
7089
pyparsing_common.fnumber.runTests("""
7090
100
7091
-100
7092
+100
7093
3.14159
7094
6.02e23
7095
1e-12
7096
""")
7097
7098
pyparsing_common.hex_integer.runTests("""
7099
100
7100
FF
7101
""")
7102
7103
import uuid
7104
pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
7105
pyparsing_common.uuid.runTests("""
7106
12345678-1234-5678-1234-567812345678
7107
""")
7108
7109