Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hhhrrrttt222111
GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/setuptools/_vendor/pyparsing.py
811 views
1
# module pyparsing.py
2
#
3
# Copyright (c) 2003-2018 Paul T. McGuire
4
#
5
# Permission is hereby granted, free of charge, to any person obtaining
6
# a copy of this software and associated documentation files (the
7
# "Software"), to deal in the Software without restriction, including
8
# without limitation the rights to use, copy, modify, merge, publish,
9
# distribute, sublicense, and/or sell copies of the Software, and to
10
# permit persons to whom the Software is furnished to do so, subject to
11
# the following conditions:
12
#
13
# The above copyright notice and this permission notice shall be
14
# included in all copies or substantial portions of the Software.
15
#
16
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
#
24
25
__doc__ = \
26
"""
27
pyparsing module - Classes and methods to define and execute parsing grammars
28
=============================================================================
29
30
The pyparsing module is an alternative approach to creating and executing simple grammars,
31
vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32
don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33
provides a library of classes that you use to construct the grammar directly in Python.
34
35
Here is a program to parse "Hello, World!" (or any greeting of the form
36
C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
37
(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
38
L{Literal} expressions)::
39
40
from pyparsing import Word, alphas
41
42
# define grammar of a greeting
43
greet = Word(alphas) + "," + Word(alphas) + "!"
44
45
hello = "Hello, World!"
46
print (hello, "->", greet.parseString(hello))
47
48
The program outputs the following::
49
50
Hello, World! -> ['Hello', ',', 'World', '!']
51
52
The Python representation of the grammar is quite readable, owing to the self-explanatory
53
class names, and the use of '+', '|' and '^' operators.
54
55
The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
56
object with named attributes.
57
58
The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
59
- extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
60
- quoted strings
61
- embedded comments
62
63
64
Getting Started -
65
-----------------
66
Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
67
classes inherit from. Use the docstrings for examples of how to:
68
- construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
69
- construct character word-group expressions using the L{Word} class
70
- see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
71
- use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones
72
- associate names with your parsed results using L{ParserElement.setResultsName}
73
- find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
74
- find more useful common expressions in the L{pyparsing_common} namespace class
75
"""
76
77
__version__ = "2.2.1"
78
__versionTime__ = "18 Sep 2018 00:49 UTC"
79
__author__ = "Paul McGuire <[email protected]>"
80
81
import string
82
from weakref import ref as wkref
83
import copy
84
import sys
85
import warnings
86
import re
87
import sre_constants
88
import collections
89
import pprint
90
import traceback
91
import types
92
from datetime import datetime
93
94
try:
95
from _thread import RLock
96
except ImportError:
97
from threading import RLock
98
99
try:
100
# Python 3
101
from collections.abc import Iterable
102
from collections.abc import MutableMapping
103
except ImportError:
104
# Python 2.7
105
from collections import Iterable
106
from collections import MutableMapping
107
108
try:
109
from collections import OrderedDict as _OrderedDict
110
except ImportError:
111
try:
112
from ordereddict import OrderedDict as _OrderedDict
113
except ImportError:
114
_OrderedDict = None
115
116
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
117
118
__all__ = [
119
'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
120
'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
121
'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
122
'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
123
'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
124
'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
125
'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
126
'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
127
'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
128
'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
129
'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
130
'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
131
'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
132
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
133
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
134
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
135
'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
136
'CloseMatch', 'tokenMap', 'pyparsing_common',
137
]
138
139
system_version = tuple(sys.version_info)[:3]
140
PY_3 = system_version[0] == 3
141
if PY_3:
142
_MAX_INT = sys.maxsize
143
basestring = str
144
unichr = chr
145
_ustr = str
146
147
# build list of single arg builtins, that can be used as parse actions
148
singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
149
150
else:
151
_MAX_INT = sys.maxint
152
range = xrange
153
154
def _ustr(obj):
155
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
156
str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
157
then < returns the unicode object | encodes it with the default encoding | ... >.
158
"""
159
if isinstance(obj,unicode):
160
return obj
161
162
try:
163
# If this works, then _ustr(obj) has the same behaviour as str(obj), so
164
# it won't break any existing code.
165
return str(obj)
166
167
except UnicodeEncodeError:
168
# Else encode it
169
ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
170
xmlcharref = Regex(r'&#\d+;')
171
xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
172
return xmlcharref.transformString(ret)
173
174
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
175
singleArgBuiltins = []
176
import __builtin__
177
for fname in "sum len sorted reversed list tuple set any all min max".split():
178
try:
179
singleArgBuiltins.append(getattr(__builtin__,fname))
180
except AttributeError:
181
continue
182
183
_generatorType = type((y for y in range(1)))
184
185
def _xml_escape(data):
186
"""Escape &, <, >, ", ', etc. in a string of data."""
187
188
# ampersand must be replaced first
189
from_symbols = '&><"\''
190
to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
191
for from_,to_ in zip(from_symbols, to_symbols):
192
data = data.replace(from_, to_)
193
return data
194
195
class _Constants(object):
196
pass
197
198
alphas = string.ascii_uppercase + string.ascii_lowercase
199
nums = "0123456789"
200
hexnums = nums + "ABCDEFabcdef"
201
alphanums = alphas + nums
202
_bslash = chr(92)
203
printables = "".join(c for c in string.printable if c not in string.whitespace)
204
205
class ParseBaseException(Exception):
206
"""base exception class for all parsing runtime exceptions"""
207
# Performance tuning: we construct a *lot* of these, so keep this
208
# constructor as small and fast as possible
209
def __init__( self, pstr, loc=0, msg=None, elem=None ):
210
self.loc = loc
211
if msg is None:
212
self.msg = pstr
213
self.pstr = ""
214
else:
215
self.msg = msg
216
self.pstr = pstr
217
self.parserElement = elem
218
self.args = (pstr, loc, msg)
219
220
@classmethod
221
def _from_exception(cls, pe):
222
"""
223
internal factory method to simplify creating one type of ParseException
224
from another - avoids having __init__ signature conflicts among subclasses
225
"""
226
return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
227
228
def __getattr__( self, aname ):
229
"""supported attributes by name are:
230
- lineno - returns the line number of the exception text
231
- col - returns the column number of the exception text
232
- line - returns the line containing the exception text
233
"""
234
if( aname == "lineno" ):
235
return lineno( self.loc, self.pstr )
236
elif( aname in ("col", "column") ):
237
return col( self.loc, self.pstr )
238
elif( aname == "line" ):
239
return line( self.loc, self.pstr )
240
else:
241
raise AttributeError(aname)
242
243
def __str__( self ):
244
return "%s (at char %d), (line:%d, col:%d)" % \
245
( self.msg, self.loc, self.lineno, self.column )
246
def __repr__( self ):
247
return _ustr(self)
248
def markInputline( self, markerString = ">!<" ):
249
"""Extracts the exception line from the input string, and marks
250
the location of the exception with a special symbol.
251
"""
252
line_str = self.line
253
line_column = self.column - 1
254
if markerString:
255
line_str = "".join((line_str[:line_column],
256
markerString, line_str[line_column:]))
257
return line_str.strip()
258
def __dir__(self):
259
return "lineno col line".split() + dir(type(self))
260
261
class ParseException(ParseBaseException):
262
"""
263
Exception thrown when parse expressions don't match class;
264
supported attributes by name are:
265
- lineno - returns the line number of the exception text
266
- col - returns the column number of the exception text
267
- line - returns the line containing the exception text
268
269
Example::
270
try:
271
Word(nums).setName("integer").parseString("ABC")
272
except ParseException as pe:
273
print(pe)
274
print("column: {}".format(pe.col))
275
276
prints::
277
Expected integer (at char 0), (line:1, col:1)
278
column: 1
279
"""
280
pass
281
282
class ParseFatalException(ParseBaseException):
283
"""user-throwable exception thrown when inconsistent parse content
284
is found; stops all parsing immediately"""
285
pass
286
287
class ParseSyntaxException(ParseFatalException):
288
"""just like L{ParseFatalException}, but thrown internally when an
289
L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
290
immediately because an unbacktrackable syntax error has been found"""
291
pass
292
293
#~ class ReparseException(ParseBaseException):
294
#~ """Experimental class - parse actions can raise this exception to cause
295
#~ pyparsing to reparse the input string:
296
#~ - with a modified input string, and/or
297
#~ - with a modified start location
298
#~ Set the values of the ReparseException in the constructor, and raise the
299
#~ exception in a parse action to cause pyparsing to use the new string/location.
300
#~ Setting the values as None causes no change to be made.
301
#~ """
302
#~ def __init_( self, newstring, restartLoc ):
303
#~ self.newParseText = newstring
304
#~ self.reparseLoc = restartLoc
305
306
class RecursiveGrammarException(Exception):
307
"""exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
308
def __init__( self, parseElementList ):
309
self.parseElementTrace = parseElementList
310
311
def __str__( self ):
312
return "RecursiveGrammarException: %s" % self.parseElementTrace
313
314
class _ParseResultsWithOffset(object):
315
def __init__(self,p1,p2):
316
self.tup = (p1,p2)
317
def __getitem__(self,i):
318
return self.tup[i]
319
def __repr__(self):
320
return repr(self.tup[0])
321
def setOffset(self,i):
322
self.tup = (self.tup[0],i)
323
324
class ParseResults(object):
325
"""
326
Structured parse results, to provide multiple means of access to the parsed data:
327
- as a list (C{len(results)})
328
- by list index (C{results[0], results[1]}, etc.)
329
- by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
330
331
Example::
332
integer = Word(nums)
333
date_str = (integer.setResultsName("year") + '/'
334
+ integer.setResultsName("month") + '/'
335
+ integer.setResultsName("day"))
336
# equivalent form:
337
# date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
338
339
# parseString returns a ParseResults object
340
result = date_str.parseString("1999/12/31")
341
342
def test(s, fn=repr):
343
print("%s -> %s" % (s, fn(eval(s))))
344
test("list(result)")
345
test("result[0]")
346
test("result['month']")
347
test("result.day")
348
test("'month' in result")
349
test("'minutes' in result")
350
test("result.dump()", str)
351
prints::
352
list(result) -> ['1999', '/', '12', '/', '31']
353
result[0] -> '1999'
354
result['month'] -> '12'
355
result.day -> '31'
356
'month' in result -> True
357
'minutes' in result -> False
358
result.dump() -> ['1999', '/', '12', '/', '31']
359
- day: 31
360
- month: 12
361
- year: 1999
362
"""
363
def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
364
if isinstance(toklist, cls):
365
return toklist
366
retobj = object.__new__(cls)
367
retobj.__doinit = True
368
return retobj
369
370
# Performance tuning: we construct a *lot* of these, so keep this
371
# constructor as small and fast as possible
372
def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
373
if self.__doinit:
374
self.__doinit = False
375
self.__name = None
376
self.__parent = None
377
self.__accumNames = {}
378
self.__asList = asList
379
self.__modal = modal
380
if toklist is None:
381
toklist = []
382
if isinstance(toklist, list):
383
self.__toklist = toklist[:]
384
elif isinstance(toklist, _generatorType):
385
self.__toklist = list(toklist)
386
else:
387
self.__toklist = [toklist]
388
self.__tokdict = dict()
389
390
if name is not None and name:
391
if not modal:
392
self.__accumNames[name] = 0
393
if isinstance(name,int):
394
name = _ustr(name) # will always return a str, but use _ustr for consistency
395
self.__name = name
396
if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
397
if isinstance(toklist,basestring):
398
toklist = [ toklist ]
399
if asList:
400
if isinstance(toklist,ParseResults):
401
self[name] = _ParseResultsWithOffset(toklist.copy(),0)
402
else:
403
self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
404
self[name].__name = name
405
else:
406
try:
407
self[name] = toklist[0]
408
except (KeyError,TypeError,IndexError):
409
self[name] = toklist
410
411
def __getitem__( self, i ):
412
if isinstance( i, (int,slice) ):
413
return self.__toklist[i]
414
else:
415
if i not in self.__accumNames:
416
return self.__tokdict[i][-1][0]
417
else:
418
return ParseResults([ v[0] for v in self.__tokdict[i] ])
419
420
def __setitem__( self, k, v, isinstance=isinstance ):
421
if isinstance(v,_ParseResultsWithOffset):
422
self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
423
sub = v[0]
424
elif isinstance(k,(int,slice)):
425
self.__toklist[k] = v
426
sub = v
427
else:
428
self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
429
sub = v
430
if isinstance(sub,ParseResults):
431
sub.__parent = wkref(self)
432
433
def __delitem__( self, i ):
434
if isinstance(i,(int,slice)):
435
mylen = len( self.__toklist )
436
del self.__toklist[i]
437
438
# convert int to slice
439
if isinstance(i, int):
440
if i < 0:
441
i += mylen
442
i = slice(i, i+1)
443
# get removed indices
444
removed = list(range(*i.indices(mylen)))
445
removed.reverse()
446
# fixup indices in token dictionary
447
for name,occurrences in self.__tokdict.items():
448
for j in removed:
449
for k, (value, position) in enumerate(occurrences):
450
occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
451
else:
452
del self.__tokdict[i]
453
454
def __contains__( self, k ):
455
return k in self.__tokdict
456
457
def __len__( self ): return len( self.__toklist )
458
def __bool__(self): return ( not not self.__toklist )
459
__nonzero__ = __bool__
460
def __iter__( self ): return iter( self.__toklist )
461
def __reversed__( self ): return iter( self.__toklist[::-1] )
462
def _iterkeys( self ):
463
if hasattr(self.__tokdict, "iterkeys"):
464
return self.__tokdict.iterkeys()
465
else:
466
return iter(self.__tokdict)
467
468
def _itervalues( self ):
469
return (self[k] for k in self._iterkeys())
470
471
def _iteritems( self ):
472
return ((k, self[k]) for k in self._iterkeys())
473
474
if PY_3:
475
keys = _iterkeys
476
"""Returns an iterator of all named result keys (Python 3.x only)."""
477
478
values = _itervalues
479
"""Returns an iterator of all named result values (Python 3.x only)."""
480
481
items = _iteritems
482
"""Returns an iterator of all named result key-value tuples (Python 3.x only)."""
483
484
else:
485
iterkeys = _iterkeys
486
"""Returns an iterator of all named result keys (Python 2.x only)."""
487
488
itervalues = _itervalues
489
"""Returns an iterator of all named result values (Python 2.x only)."""
490
491
iteritems = _iteritems
492
"""Returns an iterator of all named result key-value tuples (Python 2.x only)."""
493
494
def keys( self ):
495
"""Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
496
return list(self.iterkeys())
497
498
def values( self ):
499
"""Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
500
return list(self.itervalues())
501
502
def items( self ):
503
"""Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
504
return list(self.iteritems())
505
506
def haskeys( self ):
507
"""Since keys() returns an iterator, this method is helpful in bypassing
508
code that looks for the existence of any defined results names."""
509
return bool(self.__tokdict)
510
511
def pop( self, *args, **kwargs):
512
"""
513
Removes and returns item at specified index (default=C{last}).
514
Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
515
argument or an integer argument, it will use C{list} semantics
516
and pop tokens from the list of parsed tokens. If passed a
517
non-integer argument (most likely a string), it will use C{dict}
518
semantics and pop the corresponding value from any defined
519
results names. A second default return value argument is
520
supported, just as in C{dict.pop()}.
521
522
Example::
523
def remove_first(tokens):
524
tokens.pop(0)
525
print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
526
print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
527
528
label = Word(alphas)
529
patt = label("LABEL") + OneOrMore(Word(nums))
530
print(patt.parseString("AAB 123 321").dump())
531
532
# Use pop() in a parse action to remove named result (note that corresponding value is not
533
# removed from list form of results)
534
def remove_LABEL(tokens):
535
tokens.pop("LABEL")
536
return tokens
537
patt.addParseAction(remove_LABEL)
538
print(patt.parseString("AAB 123 321").dump())
539
prints::
540
['AAB', '123', '321']
541
- LABEL: AAB
542
543
['AAB', '123', '321']
544
"""
545
if not args:
546
args = [-1]
547
for k,v in kwargs.items():
548
if k == 'default':
549
args = (args[0], v)
550
else:
551
raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
552
if (isinstance(args[0], int) or
553
len(args) == 1 or
554
args[0] in self):
555
index = args[0]
556
ret = self[index]
557
del self[index]
558
return ret
559
else:
560
defaultvalue = args[1]
561
return defaultvalue
562
563
def get(self, key, defaultValue=None):
564
"""
565
Returns named result matching the given key, or if there is no
566
such name, then returns the given C{defaultValue} or C{None} if no
567
C{defaultValue} is specified.
568
569
Similar to C{dict.get()}.
570
571
Example::
572
integer = Word(nums)
573
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
574
575
result = date_str.parseString("1999/12/31")
576
print(result.get("year")) # -> '1999'
577
print(result.get("hour", "not specified")) # -> 'not specified'
578
print(result.get("hour")) # -> None
579
"""
580
if key in self:
581
return self[key]
582
else:
583
return defaultValue
584
585
def insert( self, index, insStr ):
586
"""
587
Inserts new element at location index in the list of parsed tokens.
588
589
Similar to C{list.insert()}.
590
591
Example::
592
print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
593
594
# use a parse action to insert the parse location in the front of the parsed results
595
def insert_locn(locn, tokens):
596
tokens.insert(0, locn)
597
print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
598
"""
599
self.__toklist.insert(index, insStr)
600
# fixup indices in token dictionary
601
for name,occurrences in self.__tokdict.items():
602
for k, (value, position) in enumerate(occurrences):
603
occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
604
605
def append( self, item ):
606
"""
607
Add single element to end of ParseResults list of elements.
608
609
Example::
610
print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
611
612
# use a parse action to compute the sum of the parsed integers, and add it to the end
613
def append_sum(tokens):
614
tokens.append(sum(map(int, tokens)))
615
print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
616
"""
617
self.__toklist.append(item)
618
619
def extend( self, itemseq ):
620
"""
621
Add sequence of elements to end of ParseResults list of elements.
622
623
Example::
624
patt = OneOrMore(Word(alphas))
625
626
# use a parse action to append the reverse of the matched strings, to make a palindrome
627
def make_palindrome(tokens):
628
tokens.extend(reversed([t[::-1] for t in tokens]))
629
return ''.join(tokens)
630
print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
631
"""
632
if isinstance(itemseq, ParseResults):
633
self += itemseq
634
else:
635
self.__toklist.extend(itemseq)
636
637
def clear( self ):
638
"""
639
Clear all elements and results names.
640
"""
641
del self.__toklist[:]
642
self.__tokdict.clear()
643
644
def __getattr__( self, name ):
645
try:
646
return self[name]
647
except KeyError:
648
return ""
649
650
if name in self.__tokdict:
651
if name not in self.__accumNames:
652
return self.__tokdict[name][-1][0]
653
else:
654
return ParseResults([ v[0] for v in self.__tokdict[name] ])
655
else:
656
return ""
657
658
def __add__( self, other ):
659
ret = self.copy()
660
ret += other
661
return ret
662
663
def __iadd__( self, other ):
664
if other.__tokdict:
665
offset = len(self.__toklist)
666
addoffset = lambda a: offset if a<0 else a+offset
667
otheritems = other.__tokdict.items()
668
otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
669
for (k,vlist) in otheritems for v in vlist]
670
for k,v in otherdictitems:
671
self[k] = v
672
if isinstance(v[0],ParseResults):
673
v[0].__parent = wkref(self)
674
675
self.__toklist += other.__toklist
676
self.__accumNames.update( other.__accumNames )
677
return self
678
679
def __radd__(self, other):
680
if isinstance(other,int) and other == 0:
681
# useful for merging many ParseResults using sum() builtin
682
return self.copy()
683
else:
684
# this may raise a TypeError - so be it
685
return other + self
686
687
def __repr__( self ):
688
return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
689
690
def __str__( self ):
691
return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
692
693
def _asStringList( self, sep='' ):
694
out = []
695
for item in self.__toklist:
696
if out and sep:
697
out.append(sep)
698
if isinstance( item, ParseResults ):
699
out += item._asStringList()
700
else:
701
out.append( _ustr(item) )
702
return out
703
704
def asList( self ):
705
"""
706
Returns the parse results as a nested list of matching tokens, all converted to strings.
707
708
Example::
709
patt = OneOrMore(Word(alphas))
710
result = patt.parseString("sldkj lsdkj sldkj")
711
# even though the result prints in string-like form, it is actually a pyparsing ParseResults
712
print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
713
714
# Use asList() to create an actual list
715
result_list = result.asList()
716
print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
717
"""
718
return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
719
720
def asDict( self ):
721
"""
722
Returns the named parse results as a nested dictionary.
723
724
Example::
725
integer = Word(nums)
726
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
727
728
result = date_str.parseString('12/31/1999')
729
print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
730
731
result_dict = result.asDict()
732
print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
733
734
# even though a ParseResults supports dict-like access, sometime you just need to have a dict
735
import json
736
print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
737
print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
738
"""
739
if PY_3:
740
item_fn = self.items
741
else:
742
item_fn = self.iteritems
743
744
def toItem(obj):
745
if isinstance(obj, ParseResults):
746
if obj.haskeys():
747
return obj.asDict()
748
else:
749
return [toItem(v) for v in obj]
750
else:
751
return obj
752
753
return dict((k,toItem(v)) for k,v in item_fn())
754
755
def copy( self ):
756
"""
757
Returns a new copy of a C{ParseResults} object.
758
"""
759
ret = ParseResults( self.__toklist )
760
ret.__tokdict = self.__tokdict.copy()
761
ret.__parent = self.__parent
762
ret.__accumNames.update( self.__accumNames )
763
ret.__name = self.__name
764
return ret
765
766
def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
767
"""
768
(Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
769
"""
770
nl = "\n"
771
out = []
772
namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
773
for v in vlist)
774
nextLevelIndent = indent + " "
775
776
# collapse out indents if formatting is not desired
777
if not formatted:
778
indent = ""
779
nextLevelIndent = ""
780
nl = ""
781
782
selfTag = None
783
if doctag is not None:
784
selfTag = doctag
785
else:
786
if self.__name:
787
selfTag = self.__name
788
789
if not selfTag:
790
if namedItemsOnly:
791
return ""
792
else:
793
selfTag = "ITEM"
794
795
out += [ nl, indent, "<", selfTag, ">" ]
796
797
for i,res in enumerate(self.__toklist):
798
if isinstance(res,ParseResults):
799
if i in namedItems:
800
out += [ res.asXML(namedItems[i],
801
namedItemsOnly and doctag is None,
802
nextLevelIndent,
803
formatted)]
804
else:
805
out += [ res.asXML(None,
806
namedItemsOnly and doctag is None,
807
nextLevelIndent,
808
formatted)]
809
else:
810
# individual token, see if there is a name for it
811
resTag = None
812
if i in namedItems:
813
resTag = namedItems[i]
814
if not resTag:
815
if namedItemsOnly:
816
continue
817
else:
818
resTag = "ITEM"
819
xmlBodyText = _xml_escape(_ustr(res))
820
out += [ nl, nextLevelIndent, "<", resTag, ">",
821
xmlBodyText,
822
"</", resTag, ">" ]
823
824
out += [ nl, indent, "</", selfTag, ">" ]
825
return "".join(out)
826
827
def __lookup(self,sub):
828
for k,vlist in self.__tokdict.items():
829
for v,loc in vlist:
830
if sub is v:
831
return k
832
return None
833
834
def getName(self):
835
r"""
836
Returns the results name for this token expression. Useful when several
837
different expressions might match at a particular location.
838
839
Example::
840
integer = Word(nums)
841
ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
842
house_number_expr = Suppress('#') + Word(nums, alphanums)
843
user_data = (Group(house_number_expr)("house_number")
844
| Group(ssn_expr)("ssn")
845
| Group(integer)("age"))
846
user_info = OneOrMore(user_data)
847
848
result = user_info.parseString("22 111-22-3333 #221B")
849
for item in result:
850
print(item.getName(), ':', item[0])
851
prints::
852
age : 22
853
ssn : 111-22-3333
854
house_number : 221B
855
"""
856
if self.__name:
857
return self.__name
858
elif self.__parent:
859
par = self.__parent()
860
if par:
861
return par.__lookup(self)
862
else:
863
return None
864
elif (len(self) == 1 and
865
len(self.__tokdict) == 1 and
866
next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
867
return next(iter(self.__tokdict.keys()))
868
else:
869
return None
870
871
def dump(self, indent='', depth=0, full=True):
872
"""
873
Diagnostic method for listing out the contents of a C{ParseResults}.
874
Accepts an optional C{indent} argument so that this string can be embedded
875
in a nested display of other data.
876
877
Example::
878
integer = Word(nums)
879
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
880
881
result = date_str.parseString('12/31/1999')
882
print(result.dump())
883
prints::
884
['12', '/', '31', '/', '1999']
885
- day: 1999
886
- month: 31
887
- year: 12
888
"""
889
out = []
890
NL = '\n'
891
out.append( indent+_ustr(self.asList()) )
892
if full:
893
if self.haskeys():
894
items = sorted((str(k), v) for k,v in self.items())
895
for k,v in items:
896
if out:
897
out.append(NL)
898
out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
899
if isinstance(v,ParseResults):
900
if v:
901
out.append( v.dump(indent,depth+1) )
902
else:
903
out.append(_ustr(v))
904
else:
905
out.append(repr(v))
906
elif any(isinstance(vv,ParseResults) for vv in self):
907
v = self
908
for i,vv in enumerate(v):
909
if isinstance(vv,ParseResults):
910
out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
911
else:
912
out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
913
914
return "".join(out)
915
916
def pprint(self, *args, **kwargs):
917
"""
918
Pretty-printer for parsed results as a list, using the C{pprint} module.
919
Accepts additional positional or keyword args as defined for the
920
C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
921
922
Example::
923
ident = Word(alphas, alphanums)
924
num = Word(nums)
925
func = Forward()
926
term = ident | num | Group('(' + func + ')')
927
func <<= ident + Group(Optional(delimitedList(term)))
928
result = func.parseString("fna a,b,(fnb c,d,200),100")
929
result.pprint(width=40)
930
prints::
931
['fna',
932
['a',
933
'b',
934
['(', 'fnb', ['c', 'd', '200'], ')'],
935
'100']]
936
"""
937
pprint.pprint(self.asList(), *args, **kwargs)
938
939
# add support for pickle protocol
940
def __getstate__(self):
941
return ( self.__toklist,
942
( self.__tokdict.copy(),
943
self.__parent is not None and self.__parent() or None,
944
self.__accumNames,
945
self.__name ) )
946
947
def __setstate__(self,state):
948
self.__toklist = state[0]
949
(self.__tokdict,
950
par,
951
inAccumNames,
952
self.__name) = state[1]
953
self.__accumNames = {}
954
self.__accumNames.update(inAccumNames)
955
if par is not None:
956
self.__parent = wkref(par)
957
else:
958
self.__parent = None
959
960
def __getnewargs__(self):
961
return self.__toklist, self.__name, self.__asList, self.__modal
962
963
def __dir__(self):
964
return (dir(type(self)) + list(self.keys()))
965
966
MutableMapping.register(ParseResults)
967
968
def col (loc,strg):
969
"""Returns current column within a string, counting newlines as line separators.
970
The first column is number 1.
971
972
Note: the default parsing behavior is to expand tabs in the input string
973
before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
974
on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
975
consistent view of the parsed string, the parse location, and line and column
976
positions within the parsed string.
977
"""
978
s = strg
979
return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
980
981
def lineno(loc,strg):
982
"""Returns current line number within a string, counting newlines as line separators.
983
The first line is number 1.
984
985
Note: the default parsing behavior is to expand tabs in the input string
986
before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
987
on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
988
consistent view of the parsed string, the parse location, and line and column
989
positions within the parsed string.
990
"""
991
return strg.count("\n",0,loc) + 1
992
993
def line( loc, strg ):
994
"""Returns the line of text containing loc within a string, counting newlines as line separators.
995
"""
996
lastCR = strg.rfind("\n", 0, loc)
997
nextCR = strg.find("\n", loc)
998
if nextCR >= 0:
999
return strg[lastCR+1:nextCR]
1000
else:
1001
return strg[lastCR+1:]
1002
1003
def _defaultStartDebugAction( instring, loc, expr ):
1004
print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
1005
1006
def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
1007
print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1008
1009
def _defaultExceptionDebugAction( instring, loc, expr, exc ):
1010
print ("Exception raised:" + _ustr(exc))
1011
1012
def nullDebugAction(*args):
1013
"""'Do-nothing' debug action, to suppress debugging output during parsing."""
1014
pass
1015
1016
# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
1017
#~ 'decorator to trim function calls to match the arity of the target'
1018
#~ def _trim_arity(func, maxargs=3):
1019
#~ if func in singleArgBuiltins:
1020
#~ return lambda s,l,t: func(t)
1021
#~ limit = 0
1022
#~ foundArity = False
1023
#~ def wrapper(*args):
1024
#~ nonlocal limit,foundArity
1025
#~ while 1:
1026
#~ try:
1027
#~ ret = func(*args[limit:])
1028
#~ foundArity = True
1029
#~ return ret
1030
#~ except TypeError:
1031
#~ if limit == maxargs or foundArity:
1032
#~ raise
1033
#~ limit += 1
1034
#~ continue
1035
#~ return wrapper
1036
1037
# this version is Python 2.x-3.x cross-compatible
1038
'decorator to trim function calls to match the arity of the target'
1039
def _trim_arity(func, maxargs=2):
1040
if func in singleArgBuiltins:
1041
return lambda s,l,t: func(t)
1042
limit = [0]
1043
foundArity = [False]
1044
1045
# traceback return data structure changed in Py3.5 - normalize back to plain tuples
1046
if system_version[:2] >= (3,5):
1047
def extract_stack(limit=0):
1048
# special handling for Python 3.5.0 - extra deep call stack by 1
1049
offset = -3 if system_version == (3,5,0) else -2
1050
frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
1051
return [frame_summary[:2]]
1052
def extract_tb(tb, limit=0):
1053
frames = traceback.extract_tb(tb, limit=limit)
1054
frame_summary = frames[-1]
1055
return [frame_summary[:2]]
1056
else:
1057
extract_stack = traceback.extract_stack
1058
extract_tb = traceback.extract_tb
1059
1060
# synthesize what would be returned by traceback.extract_stack at the call to
1061
# user's parse action 'func', so that we don't incur call penalty at parse time
1062
1063
LINE_DIFF = 6
1064
# IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
1065
# THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
1066
this_line = extract_stack(limit=2)[-1]
1067
pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
1068
1069
def wrapper(*args):
1070
while 1:
1071
try:
1072
ret = func(*args[limit[0]:])
1073
foundArity[0] = True
1074
return ret
1075
except TypeError:
1076
# re-raise TypeErrors if they did not come from our arity testing
1077
if foundArity[0]:
1078
raise
1079
else:
1080
try:
1081
tb = sys.exc_info()[-1]
1082
if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1083
raise
1084
finally:
1085
del tb
1086
1087
if limit[0] <= maxargs:
1088
limit[0] += 1
1089
continue
1090
raise
1091
1092
# copy func name to wrapper for sensible debug output
1093
func_name = "<parse action>"
1094
try:
1095
func_name = getattr(func, '__name__',
1096
getattr(func, '__class__').__name__)
1097
except Exception:
1098
func_name = str(func)
1099
wrapper.__name__ = func_name
1100
1101
return wrapper
1102
1103
class ParserElement(object):
1104
"""Abstract base level parser element class."""
1105
DEFAULT_WHITE_CHARS = " \n\t\r"
1106
verbose_stacktrace = False
1107
1108
@staticmethod
1109
def setDefaultWhitespaceChars( chars ):
1110
r"""
1111
Overrides the default whitespace chars
1112
1113
Example::
1114
# default whitespace chars are space, <TAB> and newline
1115
OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
1116
1117
# change to just treat newline as significant
1118
ParserElement.setDefaultWhitespaceChars(" \t")
1119
OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
1120
"""
1121
ParserElement.DEFAULT_WHITE_CHARS = chars
1122
1123
@staticmethod
1124
def inlineLiteralsUsing(cls):
1125
"""
1126
Set class to be used for inclusion of string literals into a parser.
1127
1128
Example::
1129
# default literal class used is Literal
1130
integer = Word(nums)
1131
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1132
1133
date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1134
1135
1136
# change to Suppress
1137
ParserElement.inlineLiteralsUsing(Suppress)
1138
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1139
1140
date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
1141
"""
1142
ParserElement._literalStringClass = cls
1143
1144
def __init__( self, savelist=False ):
1145
self.parseAction = list()
1146
self.failAction = None
1147
#~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
1148
self.strRepr = None
1149
self.resultsName = None
1150
self.saveAsList = savelist
1151
self.skipWhitespace = True
1152
self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1153
self.copyDefaultWhiteChars = True
1154
self.mayReturnEmpty = False # used when checking for left-recursion
1155
self.keepTabs = False
1156
self.ignoreExprs = list()
1157
self.debug = False
1158
self.streamlined = False
1159
self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
1160
self.errmsg = ""
1161
self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
1162
self.debugActions = ( None, None, None ) #custom debug actions
1163
self.re = None
1164
self.callPreparse = True # used to avoid redundant calls to preParse
1165
self.callDuringTry = False
1166
1167
def copy( self ):
1168
"""
1169
Make a copy of this C{ParserElement}. Useful for defining different parse actions
1170
for the same parsing pattern, using copies of the original parse element.
1171
1172
Example::
1173
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1174
integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
1175
integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1176
1177
print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1178
prints::
1179
[5120, 100, 655360, 268435456]
1180
Equivalent form of C{expr.copy()} is just C{expr()}::
1181
integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1182
"""
1183
cpy = copy.copy( self )
1184
cpy.parseAction = self.parseAction[:]
1185
cpy.ignoreExprs = self.ignoreExprs[:]
1186
if self.copyDefaultWhiteChars:
1187
cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1188
return cpy
1189
1190
def setName( self, name ):
1191
"""
1192
Define name for this expression, makes debugging and exception messages clearer.
1193
1194
Example::
1195
Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1196
Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1197
"""
1198
self.name = name
1199
self.errmsg = "Expected " + self.name
1200
if hasattr(self,"exception"):
1201
self.exception.msg = self.errmsg
1202
return self
1203
1204
def setResultsName( self, name, listAllMatches=False ):
1205
"""
1206
Define name for referencing matching tokens as a nested attribute
1207
of the returned parse results.
1208
NOTE: this returns a *copy* of the original C{ParserElement} object;
1209
this is so that the client can define a basic element, such as an
1210
integer, and reference it in multiple places with different names.
1211
1212
You can also set results names using the abbreviated syntax,
1213
C{expr("name")} in place of C{expr.setResultsName("name")} -
1214
see L{I{__call__}<__call__>}.
1215
1216
Example::
1217
date_str = (integer.setResultsName("year") + '/'
1218
+ integer.setResultsName("month") + '/'
1219
+ integer.setResultsName("day"))
1220
1221
# equivalent form:
1222
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1223
"""
1224
newself = self.copy()
1225
if name.endswith("*"):
1226
name = name[:-1]
1227
listAllMatches=True
1228
newself.resultsName = name
1229
newself.modalResults = not listAllMatches
1230
return newself
1231
1232
def setBreak(self,breakFlag = True):
1233
"""Method to invoke the Python pdb debugger when this element is
1234
about to be parsed. Set C{breakFlag} to True to enable, False to
1235
disable.
1236
"""
1237
if breakFlag:
1238
_parseMethod = self._parse
1239
def breaker(instring, loc, doActions=True, callPreParse=True):
1240
import pdb
1241
pdb.set_trace()
1242
return _parseMethod( instring, loc, doActions, callPreParse )
1243
breaker._originalParseMethod = _parseMethod
1244
self._parse = breaker
1245
else:
1246
if hasattr(self._parse,"_originalParseMethod"):
1247
self._parse = self._parse._originalParseMethod
1248
return self
1249
1250
def setParseAction( self, *fns, **kwargs ):
1251
"""
1252
Define one or more actions to perform when successfully matching parse element definition.
1253
Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
1254
C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
1255
- s = the original string being parsed (see note below)
1256
- loc = the location of the matching substring
1257
- toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
1258
If the functions in fns modify the tokens, they can return them as the return
1259
value from fn, and the modified list of tokens will replace the original.
1260
Otherwise, fn does not need to return any value.
1261
1262
Optional keyword arguments:
1263
- callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
1264
1265
Note: the default parsing behavior is to expand tabs in the input string
1266
before starting the parsing process. See L{I{parseString}<parseString>} for more information
1267
on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
1268
consistent view of the parsed string, the parse location, and line and column
1269
positions within the parsed string.
1270
1271
Example::
1272
integer = Word(nums)
1273
date_str = integer + '/' + integer + '/' + integer
1274
1275
date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1276
1277
# use parse action to convert to ints at parse time
1278
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1279
date_str = integer + '/' + integer + '/' + integer
1280
1281
# note that integer fields are now ints, not strings
1282
date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
1283
"""
1284
self.parseAction = list(map(_trim_arity, list(fns)))
1285
self.callDuringTry = kwargs.get("callDuringTry", False)
1286
return self
1287
1288
def addParseAction( self, *fns, **kwargs ):
1289
"""
1290
Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
1291
1292
See examples in L{I{copy}<copy>}.
1293
"""
1294
self.parseAction += list(map(_trim_arity, list(fns)))
1295
self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1296
return self
1297
1298
def addCondition(self, *fns, **kwargs):
1299
"""Add a boolean predicate function to expression's list of parse actions. See
1300
L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
1301
functions passed to C{addCondition} need to return boolean success/fail of the condition.
1302
1303
Optional keyword arguments:
1304
- message = define a custom message to be used in the raised exception
1305
- fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1306
1307
Example::
1308
integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1309
year_int = integer.copy()
1310
year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1311
date_str = year_int + '/' + integer + '/' + integer
1312
1313
result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1314
"""
1315
msg = kwargs.get("message", "failed user-defined condition")
1316
exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
1317
for fn in fns:
1318
def pa(s,l,t):
1319
if not bool(_trim_arity(fn)(s,l,t)):
1320
raise exc_type(s,l,msg)
1321
self.parseAction.append(pa)
1322
self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1323
return self
1324
1325
def setFailAction( self, fn ):
1326
"""Define action to perform if parsing fails at this expression.
1327
Fail acton fn is a callable function that takes the arguments
1328
C{fn(s,loc,expr,err)} where:
1329
- s = string being parsed
1330
- loc = location where expression match was attempted and failed
1331
- expr = the parse expression that failed
1332
- err = the exception thrown
1333
The function returns no value. It may throw C{L{ParseFatalException}}
1334
if it is desired to stop parsing immediately."""
1335
self.failAction = fn
1336
return self
1337
1338
def _skipIgnorables( self, instring, loc ):
1339
exprsFound = True
1340
while exprsFound:
1341
exprsFound = False
1342
for e in self.ignoreExprs:
1343
try:
1344
while 1:
1345
loc,dummy = e._parse( instring, loc )
1346
exprsFound = True
1347
except ParseException:
1348
pass
1349
return loc
1350
1351
def preParse( self, instring, loc ):
1352
if self.ignoreExprs:
1353
loc = self._skipIgnorables( instring, loc )
1354
1355
if self.skipWhitespace:
1356
wt = self.whiteChars
1357
instrlen = len(instring)
1358
while loc < instrlen and instring[loc] in wt:
1359
loc += 1
1360
1361
return loc
1362
1363
def parseImpl( self, instring, loc, doActions=True ):
1364
return loc, []
1365
1366
def postParse( self, instring, loc, tokenlist ):
1367
return tokenlist
1368
1369
#~ @profile
1370
def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1371
debugging = ( self.debug ) #and doActions )
1372
1373
if debugging or self.failAction:
1374
#~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
1375
if (self.debugActions[0] ):
1376
self.debugActions[0]( instring, loc, self )
1377
if callPreParse and self.callPreparse:
1378
preloc = self.preParse( instring, loc )
1379
else:
1380
preloc = loc
1381
tokensStart = preloc
1382
try:
1383
try:
1384
loc,tokens = self.parseImpl( instring, preloc, doActions )
1385
except IndexError:
1386
raise ParseException( instring, len(instring), self.errmsg, self )
1387
except ParseBaseException as err:
1388
#~ print ("Exception raised:", err)
1389
if self.debugActions[2]:
1390
self.debugActions[2]( instring, tokensStart, self, err )
1391
if self.failAction:
1392
self.failAction( instring, tokensStart, self, err )
1393
raise
1394
else:
1395
if callPreParse and self.callPreparse:
1396
preloc = self.preParse( instring, loc )
1397
else:
1398
preloc = loc
1399
tokensStart = preloc
1400
if self.mayIndexError or preloc >= len(instring):
1401
try:
1402
loc,tokens = self.parseImpl( instring, preloc, doActions )
1403
except IndexError:
1404
raise ParseException( instring, len(instring), self.errmsg, self )
1405
else:
1406
loc,tokens = self.parseImpl( instring, preloc, doActions )
1407
1408
tokens = self.postParse( instring, loc, tokens )
1409
1410
retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1411
if self.parseAction and (doActions or self.callDuringTry):
1412
if debugging:
1413
try:
1414
for fn in self.parseAction:
1415
tokens = fn( instring, tokensStart, retTokens )
1416
if tokens is not None:
1417
retTokens = ParseResults( tokens,
1418
self.resultsName,
1419
asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1420
modal=self.modalResults )
1421
except ParseBaseException as err:
1422
#~ print "Exception raised in user parse action:", err
1423
if (self.debugActions[2] ):
1424
self.debugActions[2]( instring, tokensStart, self, err )
1425
raise
1426
else:
1427
for fn in self.parseAction:
1428
tokens = fn( instring, tokensStart, retTokens )
1429
if tokens is not None:
1430
retTokens = ParseResults( tokens,
1431
self.resultsName,
1432
asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1433
modal=self.modalResults )
1434
if debugging:
1435
#~ print ("Matched",self,"->",retTokens.asList())
1436
if (self.debugActions[1] ):
1437
self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1438
1439
return loc, retTokens
1440
1441
def tryParse( self, instring, loc ):
1442
try:
1443
return self._parse( instring, loc, doActions=False )[0]
1444
except ParseFatalException:
1445
raise ParseException( instring, loc, self.errmsg, self)
1446
1447
def canParseNext(self, instring, loc):
1448
try:
1449
self.tryParse(instring, loc)
1450
except (ParseException, IndexError):
1451
return False
1452
else:
1453
return True
1454
1455
class _UnboundedCache(object):
1456
def __init__(self):
1457
cache = {}
1458
self.not_in_cache = not_in_cache = object()
1459
1460
def get(self, key):
1461
return cache.get(key, not_in_cache)
1462
1463
def set(self, key, value):
1464
cache[key] = value
1465
1466
def clear(self):
1467
cache.clear()
1468
1469
def cache_len(self):
1470
return len(cache)
1471
1472
self.get = types.MethodType(get, self)
1473
self.set = types.MethodType(set, self)
1474
self.clear = types.MethodType(clear, self)
1475
self.__len__ = types.MethodType(cache_len, self)
1476
1477
if _OrderedDict is not None:
1478
class _FifoCache(object):
1479
def __init__(self, size):
1480
self.not_in_cache = not_in_cache = object()
1481
1482
cache = _OrderedDict()
1483
1484
def get(self, key):
1485
return cache.get(key, not_in_cache)
1486
1487
def set(self, key, value):
1488
cache[key] = value
1489
while len(cache) > size:
1490
try:
1491
cache.popitem(False)
1492
except KeyError:
1493
pass
1494
1495
def clear(self):
1496
cache.clear()
1497
1498
def cache_len(self):
1499
return len(cache)
1500
1501
self.get = types.MethodType(get, self)
1502
self.set = types.MethodType(set, self)
1503
self.clear = types.MethodType(clear, self)
1504
self.__len__ = types.MethodType(cache_len, self)
1505
1506
else:
1507
class _FifoCache(object):
1508
def __init__(self, size):
1509
self.not_in_cache = not_in_cache = object()
1510
1511
cache = {}
1512
key_fifo = collections.deque([], size)
1513
1514
def get(self, key):
1515
return cache.get(key, not_in_cache)
1516
1517
def set(self, key, value):
1518
cache[key] = value
1519
while len(key_fifo) > size:
1520
cache.pop(key_fifo.popleft(), None)
1521
key_fifo.append(key)
1522
1523
def clear(self):
1524
cache.clear()
1525
key_fifo.clear()
1526
1527
def cache_len(self):
1528
return len(cache)
1529
1530
self.get = types.MethodType(get, self)
1531
self.set = types.MethodType(set, self)
1532
self.clear = types.MethodType(clear, self)
1533
self.__len__ = types.MethodType(cache_len, self)
1534
1535
# argument cache for optimizing repeated calls when backtracking through recursive expressions
1536
packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
1537
packrat_cache_lock = RLock()
1538
packrat_cache_stats = [0, 0]
1539
1540
# this method gets repeatedly called during backtracking with the same arguments -
1541
# we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1542
def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1543
HIT, MISS = 0, 1
1544
lookup = (self, instring, loc, callPreParse, doActions)
1545
with ParserElement.packrat_cache_lock:
1546
cache = ParserElement.packrat_cache
1547
value = cache.get(lookup)
1548
if value is cache.not_in_cache:
1549
ParserElement.packrat_cache_stats[MISS] += 1
1550
try:
1551
value = self._parseNoCache(instring, loc, doActions, callPreParse)
1552
except ParseBaseException as pe:
1553
# cache a copy of the exception, without the traceback
1554
cache.set(lookup, pe.__class__(*pe.args))
1555
raise
1556
else:
1557
cache.set(lookup, (value[0], value[1].copy()))
1558
return value
1559
else:
1560
ParserElement.packrat_cache_stats[HIT] += 1
1561
if isinstance(value, Exception):
1562
raise value
1563
return (value[0], value[1].copy())
1564
1565
_parse = _parseNoCache
1566
1567
@staticmethod
1568
def resetCache():
1569
ParserElement.packrat_cache.clear()
1570
ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1571
1572
_packratEnabled = False
1573
@staticmethod
1574
def enablePackrat(cache_size_limit=128):
1575
"""Enables "packrat" parsing, which adds memoizing to the parsing logic.
1576
Repeated parse attempts at the same string location (which happens
1577
often in many complex grammars) can immediately return a cached value,
1578
instead of re-executing parsing/validating code. Memoizing is done of
1579
both valid results and parsing exceptions.
1580
1581
Parameters:
1582
- cache_size_limit - (default=C{128}) - if an integer value is provided
1583
will limit the size of the packrat cache; if None is passed, then
1584
the cache size will be unbounded; if 0 is passed, the cache will
1585
be effectively disabled.
1586
1587
This speedup may break existing programs that use parse actions that
1588
have side-effects. For this reason, packrat parsing is disabled when
1589
you first import pyparsing. To activate the packrat feature, your
1590
program must call the class method C{ParserElement.enablePackrat()}. If
1591
your program uses C{psyco} to "compile as you go", you must call
1592
C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1593
Python will crash. For best results, call C{enablePackrat()} immediately
1594
after importing pyparsing.
1595
1596
Example::
1597
import pyparsing
1598
pyparsing.ParserElement.enablePackrat()
1599
"""
1600
if not ParserElement._packratEnabled:
1601
ParserElement._packratEnabled = True
1602
if cache_size_limit is None:
1603
ParserElement.packrat_cache = ParserElement._UnboundedCache()
1604
else:
1605
ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1606
ParserElement._parse = ParserElement._parseCache
1607
1608
def parseString( self, instring, parseAll=False ):
1609
"""
1610
Execute the parse expression with the given string.
1611
This is the main interface to the client code, once the complete
1612
expression has been built.
1613
1614
If you want the grammar to require that the entire input string be
1615
successfully parsed, then set C{parseAll} to True (equivalent to ending
1616
the grammar with C{L{StringEnd()}}).
1617
1618
Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1619
in order to report proper column numbers in parse actions.
1620
If the input string contains tabs and
1621
the grammar uses parse actions that use the C{loc} argument to index into the
1622
string being parsed, you can ensure you have a consistent view of the input
1623
string by:
1624
- calling C{parseWithTabs} on your grammar before calling C{parseString}
1625
(see L{I{parseWithTabs}<parseWithTabs>})
1626
- define your parse action using the full C{(s,loc,toks)} signature, and
1627
reference the input string using the parse action's C{s} argument
1628
- explictly expand the tabs in your input string before calling
1629
C{parseString}
1630
1631
Example::
1632
Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
1633
Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
1634
"""
1635
ParserElement.resetCache()
1636
if not self.streamlined:
1637
self.streamline()
1638
#~ self.saveAsList = True
1639
for e in self.ignoreExprs:
1640
e.streamline()
1641
if not self.keepTabs:
1642
instring = instring.expandtabs()
1643
try:
1644
loc, tokens = self._parse( instring, 0 )
1645
if parseAll:
1646
loc = self.preParse( instring, loc )
1647
se = Empty() + StringEnd()
1648
se._parse( instring, loc )
1649
except ParseBaseException as exc:
1650
if ParserElement.verbose_stacktrace:
1651
raise
1652
else:
1653
# catch and re-raise exception from here, clears out pyparsing internal stack trace
1654
raise exc
1655
else:
1656
return tokens
1657
1658
def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1659
"""
1660
Scan the input string for expression matches. Each match will return the
1661
matching tokens, start location, and end location. May be called with optional
1662
C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1663
C{overlap} is specified, then overlapping matches will be reported.
1664
1665
Note that the start and end locations are reported relative to the string
1666
being parsed. See L{I{parseString}<parseString>} for more information on parsing
1667
strings with embedded tabs.
1668
1669
Example::
1670
source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1671
print(source)
1672
for tokens,start,end in Word(alphas).scanString(source):
1673
print(' '*start + '^'*(end-start))
1674
print(' '*start + tokens[0])
1675
1676
prints::
1677
1678
sldjf123lsdjjkf345sldkjf879lkjsfd987
1679
^^^^^
1680
sldjf
1681
^^^^^^^
1682
lsdjjkf
1683
^^^^^^
1684
sldkjf
1685
^^^^^^
1686
lkjsfd
1687
"""
1688
if not self.streamlined:
1689
self.streamline()
1690
for e in self.ignoreExprs:
1691
e.streamline()
1692
1693
if not self.keepTabs:
1694
instring = _ustr(instring).expandtabs()
1695
instrlen = len(instring)
1696
loc = 0
1697
preparseFn = self.preParse
1698
parseFn = self._parse
1699
ParserElement.resetCache()
1700
matches = 0
1701
try:
1702
while loc <= instrlen and matches < maxMatches:
1703
try:
1704
preloc = preparseFn( instring, loc )
1705
nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1706
except ParseException:
1707
loc = preloc+1
1708
else:
1709
if nextLoc > loc:
1710
matches += 1
1711
yield tokens, preloc, nextLoc
1712
if overlap:
1713
nextloc = preparseFn( instring, loc )
1714
if nextloc > loc:
1715
loc = nextLoc
1716
else:
1717
loc += 1
1718
else:
1719
loc = nextLoc
1720
else:
1721
loc = preloc+1
1722
except ParseBaseException as exc:
1723
if ParserElement.verbose_stacktrace:
1724
raise
1725
else:
1726
# catch and re-raise exception from here, clears out pyparsing internal stack trace
1727
raise exc
1728
1729
def transformString( self, instring ):
1730
"""
1731
Extension to C{L{scanString}}, to modify matching text with modified tokens that may
1732
be returned from a parse action. To use C{transformString}, define a grammar and
1733
attach a parse action to it that modifies the returned token list.
1734
Invoking C{transformString()} on a target string will then scan for matches,
1735
and replace the matched text patterns according to the logic in the parse
1736
action. C{transformString()} returns the resulting transformed string.
1737
1738
Example::
1739
wd = Word(alphas)
1740
wd.setParseAction(lambda toks: toks[0].title())
1741
1742
print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
1743
Prints::
1744
Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1745
"""
1746
out = []
1747
lastE = 0
1748
# force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1749
# keep string locs straight between transformString and scanString
1750
self.keepTabs = True
1751
try:
1752
for t,s,e in self.scanString( instring ):
1753
out.append( instring[lastE:s] )
1754
if t:
1755
if isinstance(t,ParseResults):
1756
out += t.asList()
1757
elif isinstance(t,list):
1758
out += t
1759
else:
1760
out.append(t)
1761
lastE = e
1762
out.append(instring[lastE:])
1763
out = [o for o in out if o]
1764
return "".join(map(_ustr,_flatten(out)))
1765
except ParseBaseException as exc:
1766
if ParserElement.verbose_stacktrace:
1767
raise
1768
else:
1769
# catch and re-raise exception from here, clears out pyparsing internal stack trace
1770
raise exc
1771
1772
def searchString( self, instring, maxMatches=_MAX_INT ):
1773
"""
1774
Another extension to C{L{scanString}}, simplifying the access to the tokens found
1775
to match the given parse expression. May be called with optional
1776
C{maxMatches} argument, to clip searching after 'n' matches are found.
1777
1778
Example::
1779
# a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1780
cap_word = Word(alphas.upper(), alphas.lower())
1781
1782
print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
1783
1784
# the sum() builtin can be used to merge results into a single ParseResults object
1785
print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
1786
prints::
1787
[['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1788
['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1789
"""
1790
try:
1791
return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1792
except ParseBaseException as exc:
1793
if ParserElement.verbose_stacktrace:
1794
raise
1795
else:
1796
# catch and re-raise exception from here, clears out pyparsing internal stack trace
1797
raise exc
1798
1799
def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1800
"""
1801
Generator method to split a string using the given expression as a separator.
1802
May be called with optional C{maxsplit} argument, to limit the number of splits;
1803
and the optional C{includeSeparators} argument (default=C{False}), if the separating
1804
matching text should be included in the split results.
1805
1806
Example::
1807
punc = oneOf(list(".,;:/-!?"))
1808
print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1809
prints::
1810
['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1811
"""
1812
splits = 0
1813
last = 0
1814
for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1815
yield instring[last:s]
1816
if includeSeparators:
1817
yield t[0]
1818
last = e
1819
yield instring[last:]
1820
1821
def __add__(self, other ):
1822
"""
1823
Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
1824
converts them to L{Literal}s by default.
1825
1826
Example::
1827
greet = Word(alphas) + "," + Word(alphas) + "!"
1828
hello = "Hello, World!"
1829
print (hello, "->", greet.parseString(hello))
1830
Prints::
1831
Hello, World! -> ['Hello', ',', 'World', '!']
1832
"""
1833
if isinstance( other, basestring ):
1834
other = ParserElement._literalStringClass( other )
1835
if not isinstance( other, ParserElement ):
1836
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1837
SyntaxWarning, stacklevel=2)
1838
return None
1839
return And( [ self, other ] )
1840
1841
def __radd__(self, other ):
1842
"""
1843
Implementation of + operator when left operand is not a C{L{ParserElement}}
1844
"""
1845
if isinstance( other, basestring ):
1846
other = ParserElement._literalStringClass( other )
1847
if not isinstance( other, ParserElement ):
1848
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1849
SyntaxWarning, stacklevel=2)
1850
return None
1851
return other + self
1852
1853
def __sub__(self, other):
1854
"""
1855
Implementation of - operator, returns C{L{And}} with error stop
1856
"""
1857
if isinstance( other, basestring ):
1858
other = ParserElement._literalStringClass( other )
1859
if not isinstance( other, ParserElement ):
1860
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1861
SyntaxWarning, stacklevel=2)
1862
return None
1863
return self + And._ErrorStop() + other
1864
1865
def __rsub__(self, other ):
1866
"""
1867
Implementation of - operator when left operand is not a C{L{ParserElement}}
1868
"""
1869
if isinstance( other, basestring ):
1870
other = ParserElement._literalStringClass( other )
1871
if not isinstance( other, ParserElement ):
1872
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1873
SyntaxWarning, stacklevel=2)
1874
return None
1875
return other - self
1876
1877
def __mul__(self,other):
1878
"""
1879
Implementation of * operator, allows use of C{expr * 3} in place of
1880
C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1881
tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1882
may also include C{None} as in:
1883
- C{expr*(n,None)} or C{expr*(n,)} is equivalent
1884
to C{expr*n + L{ZeroOrMore}(expr)}
1885
(read as "at least n instances of C{expr}")
1886
- C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1887
(read as "0 to n instances of C{expr}")
1888
- C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1889
- C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1890
1891
Note that C{expr*(None,n)} does not raise an exception if
1892
more than n exprs exist in the input stream; that is,
1893
C{expr*(None,n)} does not enforce a maximum number of expr
1894
occurrences. If this behavior is desired, then write
1895
C{expr*(None,n) + ~expr}
1896
"""
1897
if isinstance(other,int):
1898
minElements, optElements = other,0
1899
elif isinstance(other,tuple):
1900
other = (other + (None, None))[:2]
1901
if other[0] is None:
1902
other = (0, other[1])
1903
if isinstance(other[0],int) and other[1] is None:
1904
if other[0] == 0:
1905
return ZeroOrMore(self)
1906
if other[0] == 1:
1907
return OneOrMore(self)
1908
else:
1909
return self*other[0] + ZeroOrMore(self)
1910
elif isinstance(other[0],int) and isinstance(other[1],int):
1911
minElements, optElements = other
1912
optElements -= minElements
1913
else:
1914
raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1915
else:
1916
raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1917
1918
if minElements < 0:
1919
raise ValueError("cannot multiply ParserElement by negative value")
1920
if optElements < 0:
1921
raise ValueError("second tuple value must be greater or equal to first tuple value")
1922
if minElements == optElements == 0:
1923
raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1924
1925
if (optElements):
1926
def makeOptionalList(n):
1927
if n>1:
1928
return Optional(self + makeOptionalList(n-1))
1929
else:
1930
return Optional(self)
1931
if minElements:
1932
if minElements == 1:
1933
ret = self + makeOptionalList(optElements)
1934
else:
1935
ret = And([self]*minElements) + makeOptionalList(optElements)
1936
else:
1937
ret = makeOptionalList(optElements)
1938
else:
1939
if minElements == 1:
1940
ret = self
1941
else:
1942
ret = And([self]*minElements)
1943
return ret
1944
1945
def __rmul__(self, other):
1946
return self.__mul__(other)
1947
1948
def __or__(self, other ):
1949
"""
1950
Implementation of | operator - returns C{L{MatchFirst}}
1951
"""
1952
if isinstance( other, basestring ):
1953
other = ParserElement._literalStringClass( other )
1954
if not isinstance( other, ParserElement ):
1955
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1956
SyntaxWarning, stacklevel=2)
1957
return None
1958
return MatchFirst( [ self, other ] )
1959
1960
def __ror__(self, other ):
1961
"""
1962
Implementation of | operator when left operand is not a C{L{ParserElement}}
1963
"""
1964
if isinstance( other, basestring ):
1965
other = ParserElement._literalStringClass( other )
1966
if not isinstance( other, ParserElement ):
1967
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1968
SyntaxWarning, stacklevel=2)
1969
return None
1970
return other | self
1971
1972
def __xor__(self, other ):
1973
"""
1974
Implementation of ^ operator - returns C{L{Or}}
1975
"""
1976
if isinstance( other, basestring ):
1977
other = ParserElement._literalStringClass( other )
1978
if not isinstance( other, ParserElement ):
1979
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1980
SyntaxWarning, stacklevel=2)
1981
return None
1982
return Or( [ self, other ] )
1983
1984
def __rxor__(self, other ):
1985
"""
1986
Implementation of ^ operator when left operand is not a C{L{ParserElement}}
1987
"""
1988
if isinstance( other, basestring ):
1989
other = ParserElement._literalStringClass( other )
1990
if not isinstance( other, ParserElement ):
1991
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1992
SyntaxWarning, stacklevel=2)
1993
return None
1994
return other ^ self
1995
1996
def __and__(self, other ):
1997
"""
1998
Implementation of & operator - returns C{L{Each}}
1999
"""
2000
if isinstance( other, basestring ):
2001
other = ParserElement._literalStringClass( other )
2002
if not isinstance( other, ParserElement ):
2003
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2004
SyntaxWarning, stacklevel=2)
2005
return None
2006
return Each( [ self, other ] )
2007
2008
def __rand__(self, other ):
2009
"""
2010
Implementation of & operator when left operand is not a C{L{ParserElement}}
2011
"""
2012
if isinstance( other, basestring ):
2013
other = ParserElement._literalStringClass( other )
2014
if not isinstance( other, ParserElement ):
2015
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2016
SyntaxWarning, stacklevel=2)
2017
return None
2018
return other & self
2019
2020
def __invert__( self ):
2021
"""
2022
Implementation of ~ operator - returns C{L{NotAny}}
2023
"""
2024
return NotAny( self )
2025
2026
def __call__(self, name=None):
2027
"""
2028
Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
2029
2030
If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
2031
passed as C{True}.
2032
2033
If C{name} is omitted, same as calling C{L{copy}}.
2034
2035
Example::
2036
# these are equivalent
2037
userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
2038
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
2039
"""
2040
if name is not None:
2041
return self.setResultsName(name)
2042
else:
2043
return self.copy()
2044
2045
def suppress( self ):
2046
"""
2047
Suppresses the output of this C{ParserElement}; useful to keep punctuation from
2048
cluttering up returned output.
2049
"""
2050
return Suppress( self )
2051
2052
def leaveWhitespace( self ):
2053
"""
2054
Disables the skipping of whitespace before matching the characters in the
2055
C{ParserElement}'s defined pattern. This is normally only used internally by
2056
the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2057
"""
2058
self.skipWhitespace = False
2059
return self
2060
2061
def setWhitespaceChars( self, chars ):
2062
"""
2063
Overrides the default whitespace chars
2064
"""
2065
self.skipWhitespace = True
2066
self.whiteChars = chars
2067
self.copyDefaultWhiteChars = False
2068
return self
2069
2070
def parseWithTabs( self ):
2071
"""
2072
Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
2073
Must be called before C{parseString} when the input grammar contains elements that
2074
match C{<TAB>} characters.
2075
"""
2076
self.keepTabs = True
2077
return self
2078
2079
def ignore( self, other ):
2080
"""
2081
Define expression to be ignored (e.g., comments) while doing pattern
2082
matching; may be called repeatedly, to define multiple comment or other
2083
ignorable patterns.
2084
2085
Example::
2086
patt = OneOrMore(Word(alphas))
2087
patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2088
2089
patt.ignore(cStyleComment)
2090
patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2091
"""
2092
if isinstance(other, basestring):
2093
other = Suppress(other)
2094
2095
if isinstance( other, Suppress ):
2096
if other not in self.ignoreExprs:
2097
self.ignoreExprs.append(other)
2098
else:
2099
self.ignoreExprs.append( Suppress( other.copy() ) )
2100
return self
2101
2102
def setDebugActions( self, startAction, successAction, exceptionAction ):
2103
"""
2104
Enable display of debugging messages while doing pattern matching.
2105
"""
2106
self.debugActions = (startAction or _defaultStartDebugAction,
2107
successAction or _defaultSuccessDebugAction,
2108
exceptionAction or _defaultExceptionDebugAction)
2109
self.debug = True
2110
return self
2111
2112
def setDebug( self, flag=True ):
2113
"""
2114
Enable display of debugging messages while doing pattern matching.
2115
Set C{flag} to True to enable, False to disable.
2116
2117
Example::
2118
wd = Word(alphas).setName("alphaword")
2119
integer = Word(nums).setName("numword")
2120
term = wd | integer
2121
2122
# turn on debugging for wd
2123
wd.setDebug()
2124
2125
OneOrMore(term).parseString("abc 123 xyz 890")
2126
2127
prints::
2128
Match alphaword at loc 0(1,1)
2129
Matched alphaword -> ['abc']
2130
Match alphaword at loc 3(1,4)
2131
Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2132
Match alphaword at loc 7(1,8)
2133
Matched alphaword -> ['xyz']
2134
Match alphaword at loc 11(1,12)
2135
Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2136
Match alphaword at loc 15(1,16)
2137
Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2138
2139
The output shown is that produced by the default debug actions - custom debug actions can be
2140
specified using L{setDebugActions}. Prior to attempting
2141
to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
2142
is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
2143
message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
2144
which makes debugging and exception messages easier to understand - for instance, the default
2145
name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
2146
"""
2147
if flag:
2148
self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
2149
else:
2150
self.debug = False
2151
return self
2152
2153
def __str__( self ):
2154
return self.name
2155
2156
def __repr__( self ):
2157
return _ustr(self)
2158
2159
def streamline( self ):
2160
self.streamlined = True
2161
self.strRepr = None
2162
return self
2163
2164
def checkRecursion( self, parseElementList ):
2165
pass
2166
2167
def validate( self, validateTrace=[] ):
2168
"""
2169
Check defined expressions for valid structure, check for infinite recursive definitions.
2170
"""
2171
self.checkRecursion( [] )
2172
2173
def parseFile( self, file_or_filename, parseAll=False ):
2174
"""
2175
Execute the parse expression on the given file or filename.
2176
If a filename is specified (instead of a file object),
2177
the entire file is opened, read, and closed before parsing.
2178
"""
2179
try:
2180
file_contents = file_or_filename.read()
2181
except AttributeError:
2182
with open(file_or_filename, "r") as f:
2183
file_contents = f.read()
2184
try:
2185
return self.parseString(file_contents, parseAll)
2186
except ParseBaseException as exc:
2187
if ParserElement.verbose_stacktrace:
2188
raise
2189
else:
2190
# catch and re-raise exception from here, clears out pyparsing internal stack trace
2191
raise exc
2192
2193
def __eq__(self,other):
2194
if isinstance(other, ParserElement):
2195
return self is other or vars(self) == vars(other)
2196
elif isinstance(other, basestring):
2197
return self.matches(other)
2198
else:
2199
return super(ParserElement,self)==other
2200
2201
def __ne__(self,other):
2202
return not (self == other)
2203
2204
def __hash__(self):
2205
return hash(id(self))
2206
2207
def __req__(self,other):
2208
return self == other
2209
2210
def __rne__(self,other):
2211
return not (self == other)
2212
2213
def matches(self, testString, parseAll=True):
2214
"""
2215
Method for quick testing of a parser against a test string. Good for simple
2216
inline microtests of sub expressions while building up larger parser.
2217
2218
Parameters:
2219
- testString - to test against this expression for a match
2220
- parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2221
2222
Example::
2223
expr = Word(nums)
2224
assert expr.matches("100")
2225
"""
2226
try:
2227
self.parseString(_ustr(testString), parseAll=parseAll)
2228
return True
2229
except ParseBaseException:
2230
return False
2231
2232
def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2233
"""
2234
Execute the parse expression on a series of test strings, showing each
2235
test, the parsed results or where the parse failed. Quick and easy way to
2236
run a parse expression against a list of sample strings.
2237
2238
Parameters:
2239
- tests - a list of separate test strings, or a multiline string of test strings
2240
- parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2241
- comment - (default=C{'#'}) - expression for indicating embedded comments in the test
2242
string; pass None to disable comment filtering
2243
- fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
2244
if False, only dump nested list
2245
- printResults - (default=C{True}) prints test output to stdout
2246
- failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
2247
2248
Returns: a (success, results) tuple, where success indicates that all tests succeeded
2249
(or failed if C{failureTests} is True), and the results contain a list of lines of each
2250
test's output
2251
2252
Example::
2253
number_expr = pyparsing_common.number.copy()
2254
2255
result = number_expr.runTests('''
2256
# unsigned integer
2257
100
2258
# negative integer
2259
-100
2260
# float with scientific notation
2261
6.02e23
2262
# integer with scientific notation
2263
1e-12
2264
''')
2265
print("Success" if result[0] else "Failed!")
2266
2267
result = number_expr.runTests('''
2268
# stray character
2269
100Z
2270
# missing leading digit before '.'
2271
-.100
2272
# too many '.'
2273
3.14.159
2274
''', failureTests=True)
2275
print("Success" if result[0] else "Failed!")
2276
prints::
2277
# unsigned integer
2278
100
2279
[100]
2280
2281
# negative integer
2282
-100
2283
[-100]
2284
2285
# float with scientific notation
2286
6.02e23
2287
[6.02e+23]
2288
2289
# integer with scientific notation
2290
1e-12
2291
[1e-12]
2292
2293
Success
2294
2295
# stray character
2296
100Z
2297
^
2298
FAIL: Expected end of text (at char 3), (line:1, col:4)
2299
2300
# missing leading digit before '.'
2301
-.100
2302
^
2303
FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2304
2305
# too many '.'
2306
3.14.159
2307
^
2308
FAIL: Expected end of text (at char 4), (line:1, col:5)
2309
2310
Success
2311
2312
Each test string must be on a single line. If you want to test a string that spans multiple
2313
lines, create a test like this::
2314
2315
expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2316
2317
(Note that this is a raw string literal, you must include the leading 'r'.)
2318
"""
2319
if isinstance(tests, basestring):
2320
tests = list(map(str.strip, tests.rstrip().splitlines()))
2321
if isinstance(comment, basestring):
2322
comment = Literal(comment)
2323
allResults = []
2324
comments = []
2325
success = True
2326
for t in tests:
2327
if comment is not None and comment.matches(t, False) or comments and not t:
2328
comments.append(t)
2329
continue
2330
if not t:
2331
continue
2332
out = ['\n'.join(comments), t]
2333
comments = []
2334
try:
2335
t = t.replace(r'\n','\n')
2336
result = self.parseString(t, parseAll=parseAll)
2337
out.append(result.dump(full=fullDump))
2338
success = success and not failureTests
2339
except ParseBaseException as pe:
2340
fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2341
if '\n' in t:
2342
out.append(line(pe.loc, t))
2343
out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
2344
else:
2345
out.append(' '*pe.loc + '^' + fatal)
2346
out.append("FAIL: " + str(pe))
2347
success = success and failureTests
2348
result = pe
2349
except Exception as exc:
2350
out.append("FAIL-EXCEPTION: " + str(exc))
2351
success = success and failureTests
2352
result = exc
2353
2354
if printResults:
2355
if fullDump:
2356
out.append('')
2357
print('\n'.join(out))
2358
2359
allResults.append((t, result))
2360
2361
return success, allResults
2362
2363
2364
class Token(ParserElement):
2365
"""
2366
Abstract C{ParserElement} subclass, for defining atomic matching patterns.
2367
"""
2368
def __init__( self ):
2369
super(Token,self).__init__( savelist=False )
2370
2371
2372
class Empty(Token):
2373
"""
2374
An empty token, will always match.
2375
"""
2376
def __init__( self ):
2377
super(Empty,self).__init__()
2378
self.name = "Empty"
2379
self.mayReturnEmpty = True
2380
self.mayIndexError = False
2381
2382
2383
class NoMatch(Token):
2384
"""
2385
A token that will never match.
2386
"""
2387
def __init__( self ):
2388
super(NoMatch,self).__init__()
2389
self.name = "NoMatch"
2390
self.mayReturnEmpty = True
2391
self.mayIndexError = False
2392
self.errmsg = "Unmatchable token"
2393
2394
def parseImpl( self, instring, loc, doActions=True ):
2395
raise ParseException(instring, loc, self.errmsg, self)
2396
2397
2398
class Literal(Token):
2399
"""
2400
Token to exactly match a specified string.
2401
2402
Example::
2403
Literal('blah').parseString('blah') # -> ['blah']
2404
Literal('blah').parseString('blahfooblah') # -> ['blah']
2405
Literal('blah').parseString('bla') # -> Exception: Expected "blah"
2406
2407
For case-insensitive matching, use L{CaselessLiteral}.
2408
2409
For keyword matching (force word break before and after the matched string),
2410
use L{Keyword} or L{CaselessKeyword}.
2411
"""
2412
def __init__( self, matchString ):
2413
super(Literal,self).__init__()
2414
self.match = matchString
2415
self.matchLen = len(matchString)
2416
try:
2417
self.firstMatchChar = matchString[0]
2418
except IndexError:
2419
warnings.warn("null string passed to Literal; use Empty() instead",
2420
SyntaxWarning, stacklevel=2)
2421
self.__class__ = Empty
2422
self.name = '"%s"' % _ustr(self.match)
2423
self.errmsg = "Expected " + self.name
2424
self.mayReturnEmpty = False
2425
self.mayIndexError = False
2426
2427
# Performance tuning: this routine gets called a *lot*
2428
# if this is a single character match string and the first character matches,
2429
# short-circuit as quickly as possible, and avoid calling startswith
2430
#~ @profile
2431
def parseImpl( self, instring, loc, doActions=True ):
2432
if (instring[loc] == self.firstMatchChar and
2433
(self.matchLen==1 or instring.startswith(self.match,loc)) ):
2434
return loc+self.matchLen, self.match
2435
raise ParseException(instring, loc, self.errmsg, self)
2436
_L = Literal
2437
ParserElement._literalStringClass = Literal
2438
2439
class Keyword(Token):
2440
"""
2441
Token to exactly match a specified string as a keyword, that is, it must be
2442
immediately followed by a non-keyword character. Compare with C{L{Literal}}:
2443
- C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
2444
- C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
2445
Accepts two optional constructor arguments in addition to the keyword string:
2446
- C{identChars} is a string of characters that would be valid identifier characters,
2447
defaulting to all alphanumerics + "_" and "$"
2448
- C{caseless} allows case-insensitive matching, default is C{False}.
2449
2450
Example::
2451
Keyword("start").parseString("start") # -> ['start']
2452
Keyword("start").parseString("starting") # -> Exception
2453
2454
For case-insensitive matching, use L{CaselessKeyword}.
2455
"""
2456
DEFAULT_KEYWORD_CHARS = alphanums+"_$"
2457
2458
def __init__( self, matchString, identChars=None, caseless=False ):
2459
super(Keyword,self).__init__()
2460
if identChars is None:
2461
identChars = Keyword.DEFAULT_KEYWORD_CHARS
2462
self.match = matchString
2463
self.matchLen = len(matchString)
2464
try:
2465
self.firstMatchChar = matchString[0]
2466
except IndexError:
2467
warnings.warn("null string passed to Keyword; use Empty() instead",
2468
SyntaxWarning, stacklevel=2)
2469
self.name = '"%s"' % self.match
2470
self.errmsg = "Expected " + self.name
2471
self.mayReturnEmpty = False
2472
self.mayIndexError = False
2473
self.caseless = caseless
2474
if caseless:
2475
self.caselessmatch = matchString.upper()
2476
identChars = identChars.upper()
2477
self.identChars = set(identChars)
2478
2479
def parseImpl( self, instring, loc, doActions=True ):
2480
if self.caseless:
2481
if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2482
(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
2483
(loc == 0 or instring[loc-1].upper() not in self.identChars) ):
2484
return loc+self.matchLen, self.match
2485
else:
2486
if (instring[loc] == self.firstMatchChar and
2487
(self.matchLen==1 or instring.startswith(self.match,loc)) and
2488
(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
2489
(loc == 0 or instring[loc-1] not in self.identChars) ):
2490
return loc+self.matchLen, self.match
2491
raise ParseException(instring, loc, self.errmsg, self)
2492
2493
def copy(self):
2494
c = super(Keyword,self).copy()
2495
c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
2496
return c
2497
2498
@staticmethod
2499
def setDefaultKeywordChars( chars ):
2500
"""Overrides the default Keyword chars
2501
"""
2502
Keyword.DEFAULT_KEYWORD_CHARS = chars
2503
2504
class CaselessLiteral(Literal):
2505
"""
2506
Token to match a specified string, ignoring case of letters.
2507
Note: the matched results will always be in the case of the given
2508
match string, NOT the case of the input text.
2509
2510
Example::
2511
OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2512
2513
(Contrast with example for L{CaselessKeyword}.)
2514
"""
2515
def __init__( self, matchString ):
2516
super(CaselessLiteral,self).__init__( matchString.upper() )
2517
# Preserve the defining literal.
2518
self.returnString = matchString
2519
self.name = "'%s'" % self.returnString
2520
self.errmsg = "Expected " + self.name
2521
2522
def parseImpl( self, instring, loc, doActions=True ):
2523
if instring[ loc:loc+self.matchLen ].upper() == self.match:
2524
return loc+self.matchLen, self.returnString
2525
raise ParseException(instring, loc, self.errmsg, self)
2526
2527
class CaselessKeyword(Keyword):
2528
"""
2529
Caseless version of L{Keyword}.
2530
2531
Example::
2532
OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2533
2534
(Contrast with example for L{CaselessLiteral}.)
2535
"""
2536
def __init__( self, matchString, identChars=None ):
2537
super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2538
2539
def parseImpl( self, instring, loc, doActions=True ):
2540
if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2541
(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
2542
return loc+self.matchLen, self.match
2543
raise ParseException(instring, loc, self.errmsg, self)
2544
2545
class CloseMatch(Token):
2546
"""
2547
A variation on L{Literal} which matches "close" matches, that is,
2548
strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
2549
- C{match_string} - string to be matched
2550
- C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
2551
2552
The results from a successful parse will contain the matched text from the input string and the following named results:
2553
- C{mismatches} - a list of the positions within the match_string where mismatches were found
2554
- C{original} - the original match_string used to compare against the input string
2555
2556
If C{mismatches} is an empty list, then the match was an exact match.
2557
2558
Example::
2559
patt = CloseMatch("ATCATCGAATGGA")
2560
patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2561
patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2562
2563
# exact match
2564
patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2565
2566
# close match allowing up to 2 mismatches
2567
patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
2568
patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2569
"""
2570
def __init__(self, match_string, maxMismatches=1):
2571
super(CloseMatch,self).__init__()
2572
self.name = match_string
2573
self.match_string = match_string
2574
self.maxMismatches = maxMismatches
2575
self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
2576
self.mayIndexError = False
2577
self.mayReturnEmpty = False
2578
2579
def parseImpl( self, instring, loc, doActions=True ):
2580
start = loc
2581
instrlen = len(instring)
2582
maxloc = start + len(self.match_string)
2583
2584
if maxloc <= instrlen:
2585
match_string = self.match_string
2586
match_stringloc = 0
2587
mismatches = []
2588
maxMismatches = self.maxMismatches
2589
2590
for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
2591
src,mat = s_m
2592
if src != mat:
2593
mismatches.append(match_stringloc)
2594
if len(mismatches) > maxMismatches:
2595
break
2596
else:
2597
loc = match_stringloc + 1
2598
results = ParseResults([instring[start:loc]])
2599
results['original'] = self.match_string
2600
results['mismatches'] = mismatches
2601
return loc, results
2602
2603
raise ParseException(instring, loc, self.errmsg, self)
2604
2605
2606
class Word(Token):
2607
"""
2608
Token for matching words composed of allowed character sets.
2609
Defined with string containing all allowed initial characters,
2610
an optional string containing allowed body characters (if omitted,
2611
defaults to the initial character set), and an optional minimum,
2612
maximum, and/or exact length. The default value for C{min} is 1 (a
2613
minimum value < 1 is not valid); the default values for C{max} and C{exact}
2614
are 0, meaning no maximum or exact length restriction. An optional
2615
C{excludeChars} parameter can list characters that might be found in
2616
the input C{bodyChars} string; useful to define a word of all printables
2617
except for one or two characters, for instance.
2618
2619
L{srange} is useful for defining custom character set strings for defining
2620
C{Word} expressions, using range notation from regular expression character sets.
2621
2622
A common mistake is to use C{Word} to match a specific literal string, as in
2623
C{Word("Address")}. Remember that C{Word} uses the string argument to define
2624
I{sets} of matchable characters. This expression would match "Add", "AAA",
2625
"dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
2626
To match an exact literal string, use L{Literal} or L{Keyword}.
2627
2628
pyparsing includes helper strings for building Words:
2629
- L{alphas}
2630
- L{nums}
2631
- L{alphanums}
2632
- L{hexnums}
2633
- L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
2634
- L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2635
- L{printables} (any non-whitespace character)
2636
2637
Example::
2638
# a word composed of digits
2639
integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2640
2641
# a word with a leading capital, and zero or more lowercase
2642
capital_word = Word(alphas.upper(), alphas.lower())
2643
2644
# hostnames are alphanumeric, with leading alpha, and '-'
2645
hostname = Word(alphas, alphanums+'-')
2646
2647
# roman numeral (not a strict parser, accepts invalid mix of characters)
2648
roman = Word("IVXLCDM")
2649
2650
# any string of non-whitespace characters, except for ','
2651
csv_value = Word(printables, excludeChars=",")
2652
"""
2653
def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2654
super(Word,self).__init__()
2655
if excludeChars:
2656
initChars = ''.join(c for c in initChars if c not in excludeChars)
2657
if bodyChars:
2658
bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
2659
self.initCharsOrig = initChars
2660
self.initChars = set(initChars)
2661
if bodyChars :
2662
self.bodyCharsOrig = bodyChars
2663
self.bodyChars = set(bodyChars)
2664
else:
2665
self.bodyCharsOrig = initChars
2666
self.bodyChars = set(initChars)
2667
2668
self.maxSpecified = max > 0
2669
2670
if min < 1:
2671
raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
2672
2673
self.minLen = min
2674
2675
if max > 0:
2676
self.maxLen = max
2677
else:
2678
self.maxLen = _MAX_INT
2679
2680
if exact > 0:
2681
self.maxLen = exact
2682
self.minLen = exact
2683
2684
self.name = _ustr(self)
2685
self.errmsg = "Expected " + self.name
2686
self.mayIndexError = False
2687
self.asKeyword = asKeyword
2688
2689
if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
2690
if self.bodyCharsOrig == self.initCharsOrig:
2691
self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
2692
elif len(self.initCharsOrig) == 1:
2693
self.reString = "%s[%s]*" % \
2694
(re.escape(self.initCharsOrig),
2695
_escapeRegexRangeChars(self.bodyCharsOrig),)
2696
else:
2697
self.reString = "[%s][%s]*" % \
2698
(_escapeRegexRangeChars(self.initCharsOrig),
2699
_escapeRegexRangeChars(self.bodyCharsOrig),)
2700
if self.asKeyword:
2701
self.reString = r"\b"+self.reString+r"\b"
2702
try:
2703
self.re = re.compile( self.reString )
2704
except Exception:
2705
self.re = None
2706
2707
def parseImpl( self, instring, loc, doActions=True ):
2708
if self.re:
2709
result = self.re.match(instring,loc)
2710
if not result:
2711
raise ParseException(instring, loc, self.errmsg, self)
2712
2713
loc = result.end()
2714
return loc, result.group()
2715
2716
if not(instring[ loc ] in self.initChars):
2717
raise ParseException(instring, loc, self.errmsg, self)
2718
2719
start = loc
2720
loc += 1
2721
instrlen = len(instring)
2722
bodychars = self.bodyChars
2723
maxloc = start + self.maxLen
2724
maxloc = min( maxloc, instrlen )
2725
while loc < maxloc and instring[loc] in bodychars:
2726
loc += 1
2727
2728
throwException = False
2729
if loc - start < self.minLen:
2730
throwException = True
2731
if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2732
throwException = True
2733
if self.asKeyword:
2734
if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
2735
throwException = True
2736
2737
if throwException:
2738
raise ParseException(instring, loc, self.errmsg, self)
2739
2740
return loc, instring[start:loc]
2741
2742
def __str__( self ):
2743
try:
2744
return super(Word,self).__str__()
2745
except Exception:
2746
pass
2747
2748
2749
if self.strRepr is None:
2750
2751
def charsAsStr(s):
2752
if len(s)>4:
2753
return s[:4]+"..."
2754
else:
2755
return s
2756
2757
if ( self.initCharsOrig != self.bodyCharsOrig ):
2758
self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
2759
else:
2760
self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
2761
2762
return self.strRepr
2763
2764
2765
class Regex(Token):
2766
r"""
2767
Token for matching strings that match a given regular expression.
2768
Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
2769
If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
2770
named parse results.
2771
2772
Example::
2773
realnum = Regex(r"[+-]?\d+\.\d*")
2774
date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
2775
# ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2776
roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2777
"""
2778
compiledREtype = type(re.compile("[A-Z]"))
2779
def __init__( self, pattern, flags=0):
2780
"""The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
2781
super(Regex,self).__init__()
2782
2783
if isinstance(pattern, basestring):
2784
if not pattern:
2785
warnings.warn("null string passed to Regex; use Empty() instead",
2786
SyntaxWarning, stacklevel=2)
2787
2788
self.pattern = pattern
2789
self.flags = flags
2790
2791
try:
2792
self.re = re.compile(self.pattern, self.flags)
2793
self.reString = self.pattern
2794
except sre_constants.error:
2795
warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
2796
SyntaxWarning, stacklevel=2)
2797
raise
2798
2799
elif isinstance(pattern, Regex.compiledREtype):
2800
self.re = pattern
2801
self.pattern = \
2802
self.reString = str(pattern)
2803
self.flags = flags
2804
2805
else:
2806
raise ValueError("Regex may only be constructed with a string or a compiled RE object")
2807
2808
self.name = _ustr(self)
2809
self.errmsg = "Expected " + self.name
2810
self.mayIndexError = False
2811
self.mayReturnEmpty = True
2812
2813
def parseImpl( self, instring, loc, doActions=True ):
2814
result = self.re.match(instring,loc)
2815
if not result:
2816
raise ParseException(instring, loc, self.errmsg, self)
2817
2818
loc = result.end()
2819
d = result.groupdict()
2820
ret = ParseResults(result.group())
2821
if d:
2822
for k in d:
2823
ret[k] = d[k]
2824
return loc,ret
2825
2826
def __str__( self ):
2827
try:
2828
return super(Regex,self).__str__()
2829
except Exception:
2830
pass
2831
2832
if self.strRepr is None:
2833
self.strRepr = "Re:(%s)" % repr(self.pattern)
2834
2835
return self.strRepr
2836
2837
2838
class QuotedString(Token):
2839
r"""
2840
Token for matching strings that are delimited by quoting characters.
2841
2842
Defined with the following parameters:
2843
- quoteChar - string of one or more characters defining the quote delimiting string
2844
- escChar - character to escape quotes, typically backslash (default=C{None})
2845
- escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
2846
- multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2847
- unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2848
- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2849
- convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2850
2851
Example::
2852
qs = QuotedString('"')
2853
print(qs.searchString('lsjdf "This is the quote" sldjf'))
2854
complex_qs = QuotedString('{{', endQuoteChar='}}')
2855
print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
2856
sql_qs = QuotedString('"', escQuote='""')
2857
print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
2858
prints::
2859
[['This is the quote']]
2860
[['This is the "quote"']]
2861
[['This is the quote with "embedded" quotes']]
2862
"""
2863
def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2864
super(QuotedString,self).__init__()
2865
2866
# remove white space from quote chars - wont work anyway
2867
quoteChar = quoteChar.strip()
2868
if not quoteChar:
2869
warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2870
raise SyntaxError()
2871
2872
if endQuoteChar is None:
2873
endQuoteChar = quoteChar
2874
else:
2875
endQuoteChar = endQuoteChar.strip()
2876
if not endQuoteChar:
2877
warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2878
raise SyntaxError()
2879
2880
self.quoteChar = quoteChar
2881
self.quoteCharLen = len(quoteChar)
2882
self.firstQuoteChar = quoteChar[0]
2883
self.endQuoteChar = endQuoteChar
2884
self.endQuoteCharLen = len(endQuoteChar)
2885
self.escChar = escChar
2886
self.escQuote = escQuote
2887
self.unquoteResults = unquoteResults
2888
self.convertWhitespaceEscapes = convertWhitespaceEscapes
2889
2890
if multiline:
2891
self.flags = re.MULTILINE | re.DOTALL
2892
self.pattern = r'%s(?:[^%s%s]' % \
2893
( re.escape(self.quoteChar),
2894
_escapeRegexRangeChars(self.endQuoteChar[0]),
2895
(escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2896
else:
2897
self.flags = 0
2898
self.pattern = r'%s(?:[^%s\n\r%s]' % \
2899
( re.escape(self.quoteChar),
2900
_escapeRegexRangeChars(self.endQuoteChar[0]),
2901
(escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2902
if len(self.endQuoteChar) > 1:
2903
self.pattern += (
2904
'|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2905
_escapeRegexRangeChars(self.endQuoteChar[i]))
2906
for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2907
)
2908
if escQuote:
2909
self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2910
if escChar:
2911
self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2912
self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2913
self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2914
2915
try:
2916
self.re = re.compile(self.pattern, self.flags)
2917
self.reString = self.pattern
2918
except sre_constants.error:
2919
warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2920
SyntaxWarning, stacklevel=2)
2921
raise
2922
2923
self.name = _ustr(self)
2924
self.errmsg = "Expected " + self.name
2925
self.mayIndexError = False
2926
self.mayReturnEmpty = True
2927
2928
def parseImpl( self, instring, loc, doActions=True ):
2929
result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2930
if not result:
2931
raise ParseException(instring, loc, self.errmsg, self)
2932
2933
loc = result.end()
2934
ret = result.group()
2935
2936
if self.unquoteResults:
2937
2938
# strip off quotes
2939
ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2940
2941
if isinstance(ret,basestring):
2942
# replace escaped whitespace
2943
if '\\' in ret and self.convertWhitespaceEscapes:
2944
ws_map = {
2945
r'\t' : '\t',
2946
r'\n' : '\n',
2947
r'\f' : '\f',
2948
r'\r' : '\r',
2949
}
2950
for wslit,wschar in ws_map.items():
2951
ret = ret.replace(wslit, wschar)
2952
2953
# replace escaped characters
2954
if self.escChar:
2955
ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
2956
2957
# replace escaped quotes
2958
if self.escQuote:
2959
ret = ret.replace(self.escQuote, self.endQuoteChar)
2960
2961
return loc, ret
2962
2963
def __str__( self ):
2964
try:
2965
return super(QuotedString,self).__str__()
2966
except Exception:
2967
pass
2968
2969
if self.strRepr is None:
2970
self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2971
2972
return self.strRepr
2973
2974
2975
class CharsNotIn(Token):
2976
"""
2977
Token for matching words composed of characters I{not} in a given set (will
2978
include whitespace in matched characters if not listed in the provided exclusion set - see example).
2979
Defined with string containing all disallowed characters, and an optional
2980
minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
2981
minimum value < 1 is not valid); the default values for C{max} and C{exact}
2982
are 0, meaning no maximum or exact length restriction.
2983
2984
Example::
2985
# define a comma-separated-value as anything that is not a ','
2986
csv_value = CharsNotIn(',')
2987
print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
2988
prints::
2989
['dkls', 'lsdkjf', 's12 34', '@!#', '213']
2990
"""
2991
def __init__( self, notChars, min=1, max=0, exact=0 ):
2992
super(CharsNotIn,self).__init__()
2993
self.skipWhitespace = False
2994
self.notChars = notChars
2995
2996
if min < 1:
2997
raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2998
2999
self.minLen = min
3000
3001
if max > 0:
3002
self.maxLen = max
3003
else:
3004
self.maxLen = _MAX_INT
3005
3006
if exact > 0:
3007
self.maxLen = exact
3008
self.minLen = exact
3009
3010
self.name = _ustr(self)
3011
self.errmsg = "Expected " + self.name
3012
self.mayReturnEmpty = ( self.minLen == 0 )
3013
self.mayIndexError = False
3014
3015
def parseImpl( self, instring, loc, doActions=True ):
3016
if instring[loc] in self.notChars:
3017
raise ParseException(instring, loc, self.errmsg, self)
3018
3019
start = loc
3020
loc += 1
3021
notchars = self.notChars
3022
maxlen = min( start+self.maxLen, len(instring) )
3023
while loc < maxlen and \
3024
(instring[loc] not in notchars):
3025
loc += 1
3026
3027
if loc - start < self.minLen:
3028
raise ParseException(instring, loc, self.errmsg, self)
3029
3030
return loc, instring[start:loc]
3031
3032
def __str__( self ):
3033
try:
3034
return super(CharsNotIn, self).__str__()
3035
except Exception:
3036
pass
3037
3038
if self.strRepr is None:
3039
if len(self.notChars) > 4:
3040
self.strRepr = "!W:(%s...)" % self.notChars[:4]
3041
else:
3042
self.strRepr = "!W:(%s)" % self.notChars
3043
3044
return self.strRepr
3045
3046
class White(Token):
3047
"""
3048
Special matching class for matching whitespace. Normally, whitespace is ignored
3049
by pyparsing grammars. This class is included when some whitespace structures
3050
are significant. Define with a string containing the whitespace characters to be
3051
matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
3052
as defined for the C{L{Word}} class.
3053
"""
3054
whiteStrs = {
3055
" " : "<SPC>",
3056
"\t": "<TAB>",
3057
"\n": "<LF>",
3058
"\r": "<CR>",
3059
"\f": "<FF>",
3060
}
3061
def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3062
super(White,self).__init__()
3063
self.matchWhite = ws
3064
self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
3065
#~ self.leaveWhitespace()
3066
self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3067
self.mayReturnEmpty = True
3068
self.errmsg = "Expected " + self.name
3069
3070
self.minLen = min
3071
3072
if max > 0:
3073
self.maxLen = max
3074
else:
3075
self.maxLen = _MAX_INT
3076
3077
if exact > 0:
3078
self.maxLen = exact
3079
self.minLen = exact
3080
3081
def parseImpl( self, instring, loc, doActions=True ):
3082
if not(instring[ loc ] in self.matchWhite):
3083
raise ParseException(instring, loc, self.errmsg, self)
3084
start = loc
3085
loc += 1
3086
maxloc = start + self.maxLen
3087
maxloc = min( maxloc, len(instring) )
3088
while loc < maxloc and instring[loc] in self.matchWhite:
3089
loc += 1
3090
3091
if loc - start < self.minLen:
3092
raise ParseException(instring, loc, self.errmsg, self)
3093
3094
return loc, instring[start:loc]
3095
3096
3097
class _PositionToken(Token):
3098
def __init__( self ):
3099
super(_PositionToken,self).__init__()
3100
self.name=self.__class__.__name__
3101
self.mayReturnEmpty = True
3102
self.mayIndexError = False
3103
3104
class GoToColumn(_PositionToken):
3105
"""
3106
Token to advance to a specific column of input text; useful for tabular report scraping.
3107
"""
3108
def __init__( self, colno ):
3109
super(GoToColumn,self).__init__()
3110
self.col = colno
3111
3112
def preParse( self, instring, loc ):
3113
if col(loc,instring) != self.col:
3114
instrlen = len(instring)
3115
if self.ignoreExprs:
3116
loc = self._skipIgnorables( instring, loc )
3117
while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
3118
loc += 1
3119
return loc
3120
3121
def parseImpl( self, instring, loc, doActions=True ):
3122
thiscol = col( loc, instring )
3123
if thiscol > self.col:
3124
raise ParseException( instring, loc, "Text not in expected column", self )
3125
newloc = loc + self.col - thiscol
3126
ret = instring[ loc: newloc ]
3127
return newloc, ret
3128
3129
3130
class LineStart(_PositionToken):
3131
"""
3132
Matches if current position is at the beginning of a line within the parse string
3133
3134
Example::
3135
3136
test = '''\
3137
AAA this line
3138
AAA and this line
3139
AAA but not this one
3140
B AAA and definitely not this one
3141
'''
3142
3143
for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3144
print(t)
3145
3146
Prints::
3147
['AAA', ' this line']
3148
['AAA', ' and this line']
3149
3150
"""
3151
def __init__( self ):
3152
super(LineStart,self).__init__()
3153
self.errmsg = "Expected start of line"
3154
3155
def parseImpl( self, instring, loc, doActions=True ):
3156
if col(loc, instring) == 1:
3157
return loc, []
3158
raise ParseException(instring, loc, self.errmsg, self)
3159
3160
class LineEnd(_PositionToken):
3161
"""
3162
Matches if current position is at the end of a line within the parse string
3163
"""
3164
def __init__( self ):
3165
super(LineEnd,self).__init__()
3166
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
3167
self.errmsg = "Expected end of line"
3168
3169
def parseImpl( self, instring, loc, doActions=True ):
3170
if loc<len(instring):
3171
if instring[loc] == "\n":
3172
return loc+1, "\n"
3173
else:
3174
raise ParseException(instring, loc, self.errmsg, self)
3175
elif loc == len(instring):
3176
return loc+1, []
3177
else:
3178
raise ParseException(instring, loc, self.errmsg, self)
3179
3180
class StringStart(_PositionToken):
3181
"""
3182
Matches if current position is at the beginning of the parse string
3183
"""
3184
def __init__( self ):
3185
super(StringStart,self).__init__()
3186
self.errmsg = "Expected start of text"
3187
3188
def parseImpl( self, instring, loc, doActions=True ):
3189
if loc != 0:
3190
# see if entire string up to here is just whitespace and ignoreables
3191
if loc != self.preParse( instring, 0 ):
3192
raise ParseException(instring, loc, self.errmsg, self)
3193
return loc, []
3194
3195
class StringEnd(_PositionToken):
3196
"""
3197
Matches if current position is at the end of the parse string
3198
"""
3199
def __init__( self ):
3200
super(StringEnd,self).__init__()
3201
self.errmsg = "Expected end of text"
3202
3203
def parseImpl( self, instring, loc, doActions=True ):
3204
if loc < len(instring):
3205
raise ParseException(instring, loc, self.errmsg, self)
3206
elif loc == len(instring):
3207
return loc+1, []
3208
elif loc > len(instring):
3209
return loc, []
3210
else:
3211
raise ParseException(instring, loc, self.errmsg, self)
3212
3213
class WordStart(_PositionToken):
3214
"""
3215
Matches if the current position is at the beginning of a Word, and
3216
is not preceded by any character in a given set of C{wordChars}
3217
(default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3218
use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
3219
the string being parsed, or at the beginning of a line.
3220
"""
3221
def __init__(self, wordChars = printables):
3222
super(WordStart,self).__init__()
3223
self.wordChars = set(wordChars)
3224
self.errmsg = "Not at the start of a word"
3225
3226
def parseImpl(self, instring, loc, doActions=True ):
3227
if loc != 0:
3228
if (instring[loc-1] in self.wordChars or
3229
instring[loc] not in self.wordChars):
3230
raise ParseException(instring, loc, self.errmsg, self)
3231
return loc, []
3232
3233
class WordEnd(_PositionToken):
3234
"""
3235
Matches if the current position is at the end of a Word, and
3236
is not followed by any character in a given set of C{wordChars}
3237
(default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3238
use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
3239
the string being parsed, or at the end of a line.
3240
"""
3241
def __init__(self, wordChars = printables):
3242
super(WordEnd,self).__init__()
3243
self.wordChars = set(wordChars)
3244
self.skipWhitespace = False
3245
self.errmsg = "Not at the end of a word"
3246
3247
def parseImpl(self, instring, loc, doActions=True ):
3248
instrlen = len(instring)
3249
if instrlen>0 and loc<instrlen:
3250
if (instring[loc] in self.wordChars or
3251
instring[loc-1] not in self.wordChars):
3252
raise ParseException(instring, loc, self.errmsg, self)
3253
return loc, []
3254
3255
3256
class ParseExpression(ParserElement):
3257
"""
3258
Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
3259
"""
3260
def __init__( self, exprs, savelist = False ):
3261
super(ParseExpression,self).__init__(savelist)
3262
if isinstance( exprs, _generatorType ):
3263
exprs = list(exprs)
3264
3265
if isinstance( exprs, basestring ):
3266
self.exprs = [ ParserElement._literalStringClass( exprs ) ]
3267
elif isinstance( exprs, Iterable ):
3268
exprs = list(exprs)
3269
# if sequence of strings provided, wrap with Literal
3270
if all(isinstance(expr, basestring) for expr in exprs):
3271
exprs = map(ParserElement._literalStringClass, exprs)
3272
self.exprs = list(exprs)
3273
else:
3274
try:
3275
self.exprs = list( exprs )
3276
except TypeError:
3277
self.exprs = [ exprs ]
3278
self.callPreparse = False
3279
3280
def __getitem__( self, i ):
3281
return self.exprs[i]
3282
3283
def append( self, other ):
3284
self.exprs.append( other )
3285
self.strRepr = None
3286
return self
3287
3288
def leaveWhitespace( self ):
3289
"""Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
3290
all contained expressions."""
3291
self.skipWhitespace = False
3292
self.exprs = [ e.copy() for e in self.exprs ]
3293
for e in self.exprs:
3294
e.leaveWhitespace()
3295
return self
3296
3297
def ignore( self, other ):
3298
if isinstance( other, Suppress ):
3299
if other not in self.ignoreExprs:
3300
super( ParseExpression, self).ignore( other )
3301
for e in self.exprs:
3302
e.ignore( self.ignoreExprs[-1] )
3303
else:
3304
super( ParseExpression, self).ignore( other )
3305
for e in self.exprs:
3306
e.ignore( self.ignoreExprs[-1] )
3307
return self
3308
3309
def __str__( self ):
3310
try:
3311
return super(ParseExpression,self).__str__()
3312
except Exception:
3313
pass
3314
3315
if self.strRepr is None:
3316
self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
3317
return self.strRepr
3318
3319
def streamline( self ):
3320
super(ParseExpression,self).streamline()
3321
3322
for e in self.exprs:
3323
e.streamline()
3324
3325
# collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
3326
# but only if there are no parse actions or resultsNames on the nested And's
3327
# (likewise for Or's and MatchFirst's)
3328
if ( len(self.exprs) == 2 ):
3329
other = self.exprs[0]
3330
if ( isinstance( other, self.__class__ ) and
3331
not(other.parseAction) and
3332
other.resultsName is None and
3333
not other.debug ):
3334
self.exprs = other.exprs[:] + [ self.exprs[1] ]
3335
self.strRepr = None
3336
self.mayReturnEmpty |= other.mayReturnEmpty
3337
self.mayIndexError |= other.mayIndexError
3338
3339
other = self.exprs[-1]
3340
if ( isinstance( other, self.__class__ ) and
3341
not(other.parseAction) and
3342
other.resultsName is None and
3343
not other.debug ):
3344
self.exprs = self.exprs[:-1] + other.exprs[:]
3345
self.strRepr = None
3346
self.mayReturnEmpty |= other.mayReturnEmpty
3347
self.mayIndexError |= other.mayIndexError
3348
3349
self.errmsg = "Expected " + _ustr(self)
3350
3351
return self
3352
3353
def setResultsName( self, name, listAllMatches=False ):
3354
ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
3355
return ret
3356
3357
def validate( self, validateTrace=[] ):
3358
tmp = validateTrace[:]+[self]
3359
for e in self.exprs:
3360
e.validate(tmp)
3361
self.checkRecursion( [] )
3362
3363
def copy(self):
3364
ret = super(ParseExpression,self).copy()
3365
ret.exprs = [e.copy() for e in self.exprs]
3366
return ret
3367
3368
class And(ParseExpression):
3369
"""
3370
Requires all given C{ParseExpression}s to be found in the given order.
3371
Expressions may be separated by whitespace.
3372
May be constructed using the C{'+'} operator.
3373
May also be constructed using the C{'-'} operator, which will suppress backtracking.
3374
3375
Example::
3376
integer = Word(nums)
3377
name_expr = OneOrMore(Word(alphas))
3378
3379
expr = And([integer("id"),name_expr("name"),integer("age")])
3380
# more easily written as:
3381
expr = integer("id") + name_expr("name") + integer("age")
3382
"""
3383
3384
class _ErrorStop(Empty):
3385
def __init__(self, *args, **kwargs):
3386
super(And._ErrorStop,self).__init__(*args, **kwargs)
3387
self.name = '-'
3388
self.leaveWhitespace()
3389
3390
def __init__( self, exprs, savelist = True ):
3391
super(And,self).__init__(exprs, savelist)
3392
self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3393
self.setWhitespaceChars( self.exprs[0].whiteChars )
3394
self.skipWhitespace = self.exprs[0].skipWhitespace
3395
self.callPreparse = True
3396
3397
def parseImpl( self, instring, loc, doActions=True ):
3398
# pass False as last arg to _parse for first element, since we already
3399
# pre-parsed the string as part of our And pre-parsing
3400
loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
3401
errorStop = False
3402
for e in self.exprs[1:]:
3403
if isinstance(e, And._ErrorStop):
3404
errorStop = True
3405
continue
3406
if errorStop:
3407
try:
3408
loc, exprtokens = e._parse( instring, loc, doActions )
3409
except ParseSyntaxException:
3410
raise
3411
except ParseBaseException as pe:
3412
pe.__traceback__ = None
3413
raise ParseSyntaxException._from_exception(pe)
3414
except IndexError:
3415
raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
3416
else:
3417
loc, exprtokens = e._parse( instring, loc, doActions )
3418
if exprtokens or exprtokens.haskeys():
3419
resultlist += exprtokens
3420
return loc, resultlist
3421
3422
def __iadd__(self, other ):
3423
if isinstance( other, basestring ):
3424
other = ParserElement._literalStringClass( other )
3425
return self.append( other ) #And( [ self, other ] )
3426
3427
def checkRecursion( self, parseElementList ):
3428
subRecCheckList = parseElementList[:] + [ self ]
3429
for e in self.exprs:
3430
e.checkRecursion( subRecCheckList )
3431
if not e.mayReturnEmpty:
3432
break
3433
3434
def __str__( self ):
3435
if hasattr(self,"name"):
3436
return self.name
3437
3438
if self.strRepr is None:
3439
self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
3440
3441
return self.strRepr
3442
3443
3444
class Or(ParseExpression):
3445
"""
3446
Requires that at least one C{ParseExpression} is found.
3447
If two expressions match, the expression that matches the longest string will be used.
3448
May be constructed using the C{'^'} operator.
3449
3450
Example::
3451
# construct Or using '^' operator
3452
3453
number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3454
print(number.searchString("123 3.1416 789"))
3455
prints::
3456
[['123'], ['3.1416'], ['789']]
3457
"""
3458
def __init__( self, exprs, savelist = False ):
3459
super(Or,self).__init__(exprs, savelist)
3460
if self.exprs:
3461
self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3462
else:
3463
self.mayReturnEmpty = True
3464
3465
def parseImpl( self, instring, loc, doActions=True ):
3466
maxExcLoc = -1
3467
maxException = None
3468
matches = []
3469
for e in self.exprs:
3470
try:
3471
loc2 = e.tryParse( instring, loc )
3472
except ParseException as err:
3473
err.__traceback__ = None
3474
if err.loc > maxExcLoc:
3475
maxException = err
3476
maxExcLoc = err.loc
3477
except IndexError:
3478
if len(instring) > maxExcLoc:
3479
maxException = ParseException(instring,len(instring),e.errmsg,self)
3480
maxExcLoc = len(instring)
3481
else:
3482
# save match among all matches, to retry longest to shortest
3483
matches.append((loc2, e))
3484
3485
if matches:
3486
matches.sort(key=lambda x: -x[0])
3487
for _,e in matches:
3488
try:
3489
return e._parse( instring, loc, doActions )
3490
except ParseException as err:
3491
err.__traceback__ = None
3492
if err.loc > maxExcLoc:
3493
maxException = err
3494
maxExcLoc = err.loc
3495
3496
if maxException is not None:
3497
maxException.msg = self.errmsg
3498
raise maxException
3499
else:
3500
raise ParseException(instring, loc, "no defined alternatives to match", self)
3501
3502
3503
def __ixor__(self, other ):
3504
if isinstance( other, basestring ):
3505
other = ParserElement._literalStringClass( other )
3506
return self.append( other ) #Or( [ self, other ] )
3507
3508
def __str__( self ):
3509
if hasattr(self,"name"):
3510
return self.name
3511
3512
if self.strRepr is None:
3513
self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
3514
3515
return self.strRepr
3516
3517
def checkRecursion( self, parseElementList ):
3518
subRecCheckList = parseElementList[:] + [ self ]
3519
for e in self.exprs:
3520
e.checkRecursion( subRecCheckList )
3521
3522
3523
class MatchFirst(ParseExpression):
3524
"""
3525
Requires that at least one C{ParseExpression} is found.
3526
If two expressions match, the first one listed is the one that will match.
3527
May be constructed using the C{'|'} operator.
3528
3529
Example::
3530
# construct MatchFirst using '|' operator
3531
3532
# watch the order of expressions to match
3533
number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
3534
print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
3535
3536
# put more selective expression first
3537
number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
3538
print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
3539
"""
3540
def __init__( self, exprs, savelist = False ):
3541
super(MatchFirst,self).__init__(exprs, savelist)
3542
if self.exprs:
3543
self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3544
else:
3545
self.mayReturnEmpty = True
3546
3547
def parseImpl( self, instring, loc, doActions=True ):
3548
maxExcLoc = -1
3549
maxException = None
3550
for e in self.exprs:
3551
try:
3552
ret = e._parse( instring, loc, doActions )
3553
return ret
3554
except ParseException as err:
3555
if err.loc > maxExcLoc:
3556
maxException = err
3557
maxExcLoc = err.loc
3558
except IndexError:
3559
if len(instring) > maxExcLoc:
3560
maxException = ParseException(instring,len(instring),e.errmsg,self)
3561
maxExcLoc = len(instring)
3562
3563
# only got here if no expression matched, raise exception for match that made it the furthest
3564
else:
3565
if maxException is not None:
3566
maxException.msg = self.errmsg
3567
raise maxException
3568
else:
3569
raise ParseException(instring, loc, "no defined alternatives to match", self)
3570
3571
def __ior__(self, other ):
3572
if isinstance( other, basestring ):
3573
other = ParserElement._literalStringClass( other )
3574
return self.append( other ) #MatchFirst( [ self, other ] )
3575
3576
def __str__( self ):
3577
if hasattr(self,"name"):
3578
return self.name
3579
3580
if self.strRepr is None:
3581
self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
3582
3583
return self.strRepr
3584
3585
def checkRecursion( self, parseElementList ):
3586
subRecCheckList = parseElementList[:] + [ self ]
3587
for e in self.exprs:
3588
e.checkRecursion( subRecCheckList )
3589
3590
3591
class Each(ParseExpression):
3592
"""
3593
Requires all given C{ParseExpression}s to be found, but in any order.
3594
Expressions may be separated by whitespace.
3595
May be constructed using the C{'&'} operator.
3596
3597
Example::
3598
color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
3599
shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
3600
integer = Word(nums)
3601
shape_attr = "shape:" + shape_type("shape")
3602
posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
3603
color_attr = "color:" + color("color")
3604
size_attr = "size:" + integer("size")
3605
3606
# use Each (using operator '&') to accept attributes in any order
3607
# (shape and posn are required, color and size are optional)
3608
shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
3609
3610
shape_spec.runTests('''
3611
shape: SQUARE color: BLACK posn: 100, 120
3612
shape: CIRCLE size: 50 color: BLUE posn: 50,80
3613
color:GREEN size:20 shape:TRIANGLE posn:20,40
3614
'''
3615
)
3616
prints::
3617
shape: SQUARE color: BLACK posn: 100, 120
3618
['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
3619
- color: BLACK
3620
- posn: ['100', ',', '120']
3621
- x: 100
3622
- y: 120
3623
- shape: SQUARE
3624
3625
3626
shape: CIRCLE size: 50 color: BLUE posn: 50,80
3627
['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
3628
- color: BLUE
3629
- posn: ['50', ',', '80']
3630
- x: 50
3631
- y: 80
3632
- shape: CIRCLE
3633
- size: 50
3634
3635
3636
color: GREEN size: 20 shape: TRIANGLE posn: 20,40
3637
['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
3638
- color: GREEN
3639
- posn: ['20', ',', '40']
3640
- x: 20
3641
- y: 40
3642
- shape: TRIANGLE
3643
- size: 20
3644
"""
3645
def __init__( self, exprs, savelist = True ):
3646
super(Each,self).__init__(exprs, savelist)
3647
self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3648
self.skipWhitespace = True
3649
self.initExprGroups = True
3650
3651
def parseImpl( self, instring, loc, doActions=True ):
3652
if self.initExprGroups:
3653
self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
3654
opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
3655
opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
3656
self.optionals = opt1 + opt2
3657
self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
3658
self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
3659
self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
3660
self.required += self.multirequired
3661
self.initExprGroups = False
3662
tmpLoc = loc
3663
tmpReqd = self.required[:]
3664
tmpOpt = self.optionals[:]
3665
matchOrder = []
3666
3667
keepMatching = True
3668
while keepMatching:
3669
tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
3670
failed = []
3671
for e in tmpExprs:
3672
try:
3673
tmpLoc = e.tryParse( instring, tmpLoc )
3674
except ParseException:
3675
failed.append(e)
3676
else:
3677
matchOrder.append(self.opt1map.get(id(e),e))
3678
if e in tmpReqd:
3679
tmpReqd.remove(e)
3680
elif e in tmpOpt:
3681
tmpOpt.remove(e)
3682
if len(failed) == len(tmpExprs):
3683
keepMatching = False
3684
3685
if tmpReqd:
3686
missing = ", ".join(_ustr(e) for e in tmpReqd)
3687
raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
3688
3689
# add any unmatched Optionals, in case they have default values defined
3690
matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
3691
3692
resultlist = []
3693
for e in matchOrder:
3694
loc,results = e._parse(instring,loc,doActions)
3695
resultlist.append(results)
3696
3697
finalResults = sum(resultlist, ParseResults([]))
3698
return loc, finalResults
3699
3700
def __str__( self ):
3701
if hasattr(self,"name"):
3702
return self.name
3703
3704
if self.strRepr is None:
3705
self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
3706
3707
return self.strRepr
3708
3709
def checkRecursion( self, parseElementList ):
3710
subRecCheckList = parseElementList[:] + [ self ]
3711
for e in self.exprs:
3712
e.checkRecursion( subRecCheckList )
3713
3714
3715
class ParseElementEnhance(ParserElement):
3716
"""
3717
Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
3718
"""
3719
def __init__( self, expr, savelist=False ):
3720
super(ParseElementEnhance,self).__init__(savelist)
3721
if isinstance( expr, basestring ):
3722
if issubclass(ParserElement._literalStringClass, Token):
3723
expr = ParserElement._literalStringClass(expr)
3724
else:
3725
expr = ParserElement._literalStringClass(Literal(expr))
3726
self.expr = expr
3727
self.strRepr = None
3728
if expr is not None:
3729
self.mayIndexError = expr.mayIndexError
3730
self.mayReturnEmpty = expr.mayReturnEmpty
3731
self.setWhitespaceChars( expr.whiteChars )
3732
self.skipWhitespace = expr.skipWhitespace
3733
self.saveAsList = expr.saveAsList
3734
self.callPreparse = expr.callPreparse
3735
self.ignoreExprs.extend(expr.ignoreExprs)
3736
3737
def parseImpl( self, instring, loc, doActions=True ):
3738
if self.expr is not None:
3739
return self.expr._parse( instring, loc, doActions, callPreParse=False )
3740
else:
3741
raise ParseException("",loc,self.errmsg,self)
3742
3743
def leaveWhitespace( self ):
3744
self.skipWhitespace = False
3745
self.expr = self.expr.copy()
3746
if self.expr is not None:
3747
self.expr.leaveWhitespace()
3748
return self
3749
3750
def ignore( self, other ):
3751
if isinstance( other, Suppress ):
3752
if other not in self.ignoreExprs:
3753
super( ParseElementEnhance, self).ignore( other )
3754
if self.expr is not None:
3755
self.expr.ignore( self.ignoreExprs[-1] )
3756
else:
3757
super( ParseElementEnhance, self).ignore( other )
3758
if self.expr is not None:
3759
self.expr.ignore( self.ignoreExprs[-1] )
3760
return self
3761
3762
def streamline( self ):
3763
super(ParseElementEnhance,self).streamline()
3764
if self.expr is not None:
3765
self.expr.streamline()
3766
return self
3767
3768
def checkRecursion( self, parseElementList ):
3769
if self in parseElementList:
3770
raise RecursiveGrammarException( parseElementList+[self] )
3771
subRecCheckList = parseElementList[:] + [ self ]
3772
if self.expr is not None:
3773
self.expr.checkRecursion( subRecCheckList )
3774
3775
def validate( self, validateTrace=[] ):
3776
tmp = validateTrace[:]+[self]
3777
if self.expr is not None:
3778
self.expr.validate(tmp)
3779
self.checkRecursion( [] )
3780
3781
def __str__( self ):
3782
try:
3783
return super(ParseElementEnhance,self).__str__()
3784
except Exception:
3785
pass
3786
3787
if self.strRepr is None and self.expr is not None:
3788
self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
3789
return self.strRepr
3790
3791
3792
class FollowedBy(ParseElementEnhance):
3793
"""
3794
Lookahead matching of the given parse expression. C{FollowedBy}
3795
does I{not} advance the parsing position within the input string, it only
3796
verifies that the specified parse expression matches at the current
3797
position. C{FollowedBy} always returns a null token list.
3798
3799
Example::
3800
# use FollowedBy to match a label only if it is followed by a ':'
3801
data_word = Word(alphas)
3802
label = data_word + FollowedBy(':')
3803
attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3804
3805
OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
3806
prints::
3807
[['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
3808
"""
3809
def __init__( self, expr ):
3810
super(FollowedBy,self).__init__(expr)
3811
self.mayReturnEmpty = True
3812
3813
def parseImpl( self, instring, loc, doActions=True ):
3814
self.expr.tryParse( instring, loc )
3815
return loc, []
3816
3817
3818
class NotAny(ParseElementEnhance):
3819
"""
3820
Lookahead to disallow matching with the given parse expression. C{NotAny}
3821
does I{not} advance the parsing position within the input string, it only
3822
verifies that the specified parse expression does I{not} match at the current
3823
position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
3824
always returns a null token list. May be constructed using the '~' operator.
3825
3826
Example::
3827
3828
"""
3829
def __init__( self, expr ):
3830
super(NotAny,self).__init__(expr)
3831
#~ self.leaveWhitespace()
3832
self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
3833
self.mayReturnEmpty = True
3834
self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3835
3836
def parseImpl( self, instring, loc, doActions=True ):
3837
if self.expr.canParseNext(instring, loc):
3838
raise ParseException(instring, loc, self.errmsg, self)
3839
return loc, []
3840
3841
def __str__( self ):
3842
if hasattr(self,"name"):
3843
return self.name
3844
3845
if self.strRepr is None:
3846
self.strRepr = "~{" + _ustr(self.expr) + "}"
3847
3848
return self.strRepr
3849
3850
class _MultipleMatch(ParseElementEnhance):
3851
def __init__( self, expr, stopOn=None):
3852
super(_MultipleMatch, self).__init__(expr)
3853
self.saveAsList = True
3854
ender = stopOn
3855
if isinstance(ender, basestring):
3856
ender = ParserElement._literalStringClass(ender)
3857
self.not_ender = ~ender if ender is not None else None
3858
3859
def parseImpl( self, instring, loc, doActions=True ):
3860
self_expr_parse = self.expr._parse
3861
self_skip_ignorables = self._skipIgnorables
3862
check_ender = self.not_ender is not None
3863
if check_ender:
3864
try_not_ender = self.not_ender.tryParse
3865
3866
# must be at least one (but first see if we are the stopOn sentinel;
3867
# if so, fail)
3868
if check_ender:
3869
try_not_ender(instring, loc)
3870
loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
3871
try:
3872
hasIgnoreExprs = (not not self.ignoreExprs)
3873
while 1:
3874
if check_ender:
3875
try_not_ender(instring, loc)
3876
if hasIgnoreExprs:
3877
preloc = self_skip_ignorables( instring, loc )
3878
else:
3879
preloc = loc
3880
loc, tmptokens = self_expr_parse( instring, preloc, doActions )
3881
if tmptokens or tmptokens.haskeys():
3882
tokens += tmptokens
3883
except (ParseException,IndexError):
3884
pass
3885
3886
return loc, tokens
3887
3888
class OneOrMore(_MultipleMatch):
3889
"""
3890
Repetition of one or more of the given expression.
3891
3892
Parameters:
3893
- expr - expression that must match one or more times
3894
- stopOn - (default=C{None}) - expression for a terminating sentinel
3895
(only required if the sentinel would ordinarily match the repetition
3896
expression)
3897
3898
Example::
3899
data_word = Word(alphas)
3900
label = data_word + FollowedBy(':')
3901
attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
3902
3903
text = "shape: SQUARE posn: upper left color: BLACK"
3904
OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
3905
3906
# use stopOn attribute for OneOrMore to avoid reading label string as part of the data
3907
attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3908
OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
3909
3910
# could also be written as
3911
(attr_expr * (1,)).parseString(text).pprint()
3912
"""
3913
3914
def __str__( self ):
3915
if hasattr(self,"name"):
3916
return self.name
3917
3918
if self.strRepr is None:
3919
self.strRepr = "{" + _ustr(self.expr) + "}..."
3920
3921
return self.strRepr
3922
3923
class ZeroOrMore(_MultipleMatch):
3924
"""
3925
Optional repetition of zero or more of the given expression.
3926
3927
Parameters:
3928
- expr - expression that must match zero or more times
3929
- stopOn - (default=C{None}) - expression for a terminating sentinel
3930
(only required if the sentinel would ordinarily match the repetition
3931
expression)
3932
3933
Example: similar to L{OneOrMore}
3934
"""
3935
def __init__( self, expr, stopOn=None):
3936
super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
3937
self.mayReturnEmpty = True
3938
3939
def parseImpl( self, instring, loc, doActions=True ):
3940
try:
3941
return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
3942
except (ParseException,IndexError):
3943
return loc, []
3944
3945
def __str__( self ):
3946
if hasattr(self,"name"):
3947
return self.name
3948
3949
if self.strRepr is None:
3950
self.strRepr = "[" + _ustr(self.expr) + "]..."
3951
3952
return self.strRepr
3953
3954
class _NullToken(object):
3955
def __bool__(self):
3956
return False
3957
__nonzero__ = __bool__
3958
def __str__(self):
3959
return ""
3960
3961
_optionalNotMatched = _NullToken()
3962
class Optional(ParseElementEnhance):
3963
"""
3964
Optional matching of the given expression.
3965
3966
Parameters:
3967
- expr - expression that must match zero or more times
3968
- default (optional) - value to be returned if the optional expression is not found.
3969
3970
Example::
3971
# US postal code can be a 5-digit zip, plus optional 4-digit qualifier
3972
zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
3973
zip.runTests('''
3974
# traditional ZIP code
3975
12345
3976
3977
# ZIP+4 form
3978
12101-0001
3979
3980
# invalid ZIP
3981
98765-
3982
''')
3983
prints::
3984
# traditional ZIP code
3985
12345
3986
['12345']
3987
3988
# ZIP+4 form
3989
12101-0001
3990
['12101-0001']
3991
3992
# invalid ZIP
3993
98765-
3994
^
3995
FAIL: Expected end of text (at char 5), (line:1, col:6)
3996
"""
3997
def __init__( self, expr, default=_optionalNotMatched ):
3998
super(Optional,self).__init__( expr, savelist=False )
3999
self.saveAsList = self.expr.saveAsList
4000
self.defaultValue = default
4001
self.mayReturnEmpty = True
4002
4003
def parseImpl( self, instring, loc, doActions=True ):
4004
try:
4005
loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
4006
except (ParseException,IndexError):
4007
if self.defaultValue is not _optionalNotMatched:
4008
if self.expr.resultsName:
4009
tokens = ParseResults([ self.defaultValue ])
4010
tokens[self.expr.resultsName] = self.defaultValue
4011
else:
4012
tokens = [ self.defaultValue ]
4013
else:
4014
tokens = []
4015
return loc, tokens
4016
4017
def __str__( self ):
4018
if hasattr(self,"name"):
4019
return self.name
4020
4021
if self.strRepr is None:
4022
self.strRepr = "[" + _ustr(self.expr) + "]"
4023
4024
return self.strRepr
4025
4026
class SkipTo(ParseElementEnhance):
4027
"""
4028
Token for skipping over all undefined text until the matched expression is found.
4029
4030
Parameters:
4031
- expr - target expression marking the end of the data to be skipped
4032
- include - (default=C{False}) if True, the target expression is also parsed
4033
(the skipped text and target expression are returned as a 2-element list).
4034
- ignore - (default=C{None}) used to define grammars (typically quoted strings and
4035
comments) that might contain false matches to the target expression
4036
- failOn - (default=C{None}) define expressions that are not allowed to be
4037
included in the skipped test; if found before the target expression is found,
4038
the SkipTo is not a match
4039
4040
Example::
4041
report = '''
4042
Outstanding Issues Report - 1 Jan 2000
4043
4044
# | Severity | Description | Days Open
4045
-----+----------+-------------------------------------------+-----------
4046
101 | Critical | Intermittent system crash | 6
4047
94 | Cosmetic | Spelling error on Login ('log|n') | 14
4048
79 | Minor | System slow when running too many reports | 47
4049
'''
4050
integer = Word(nums)
4051
SEP = Suppress('|')
4052
# use SkipTo to simply match everything up until the next SEP
4053
# - ignore quoted strings, so that a '|' character inside a quoted string does not match
4054
# - parse action will call token.strip() for each matched token, i.e., the description body
4055
string_data = SkipTo(SEP, ignore=quotedString)
4056
string_data.setParseAction(tokenMap(str.strip))
4057
ticket_expr = (integer("issue_num") + SEP
4058
+ string_data("sev") + SEP
4059
+ string_data("desc") + SEP
4060
+ integer("days_open"))
4061
4062
for tkt in ticket_expr.searchString(report):
4063
print tkt.dump()
4064
prints::
4065
['101', 'Critical', 'Intermittent system crash', '6']
4066
- days_open: 6
4067
- desc: Intermittent system crash
4068
- issue_num: 101
4069
- sev: Critical
4070
['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4071
- days_open: 14
4072
- desc: Spelling error on Login ('log|n')
4073
- issue_num: 94
4074
- sev: Cosmetic
4075
['79', 'Minor', 'System slow when running too many reports', '47']
4076
- days_open: 47
4077
- desc: System slow when running too many reports
4078
- issue_num: 79
4079
- sev: Minor
4080
"""
4081
def __init__( self, other, include=False, ignore=None, failOn=None ):
4082
super( SkipTo, self ).__init__( other )
4083
self.ignoreExpr = ignore
4084
self.mayReturnEmpty = True
4085
self.mayIndexError = False
4086
self.includeMatch = include
4087
self.asList = False
4088
if isinstance(failOn, basestring):
4089
self.failOn = ParserElement._literalStringClass(failOn)
4090
else:
4091
self.failOn = failOn
4092
self.errmsg = "No match found for "+_ustr(self.expr)
4093
4094
def parseImpl( self, instring, loc, doActions=True ):
4095
startloc = loc
4096
instrlen = len(instring)
4097
expr = self.expr
4098
expr_parse = self.expr._parse
4099
self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4100
self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4101
4102
tmploc = loc
4103
while tmploc <= instrlen:
4104
if self_failOn_canParseNext is not None:
4105
# break if failOn expression matches
4106
if self_failOn_canParseNext(instring, tmploc):
4107
break
4108
4109
if self_ignoreExpr_tryParse is not None:
4110
# advance past ignore expressions
4111
while 1:
4112
try:
4113
tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4114
except ParseBaseException:
4115
break
4116
4117
try:
4118
expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4119
except (ParseException, IndexError):
4120
# no match, advance loc in string
4121
tmploc += 1
4122
else:
4123
# matched skipto expr, done
4124
break
4125
4126
else:
4127
# ran off the end of the input string without matching skipto expr, fail
4128
raise ParseException(instring, loc, self.errmsg, self)
4129
4130
# build up return values
4131
loc = tmploc
4132
skiptext = instring[startloc:loc]
4133
skipresult = ParseResults(skiptext)
4134
4135
if self.includeMatch:
4136
loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
4137
skipresult += mat
4138
4139
return loc, skipresult
4140
4141
class Forward(ParseElementEnhance):
4142
"""
4143
Forward declaration of an expression to be defined later -
4144
used for recursive grammars, such as algebraic infix notation.
4145
When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
4146
4147
Note: take care when assigning to C{Forward} not to overlook precedence of operators.
4148
Specifically, '|' has a lower precedence than '<<', so that::
4149
fwdExpr << a | b | c
4150
will actually be evaluated as::
4151
(fwdExpr << a) | b | c
4152
thereby leaving b and c out as parseable alternatives. It is recommended that you
4153
explicitly group the values inserted into the C{Forward}::
4154
fwdExpr << (a | b | c)
4155
Converting to use the '<<=' operator instead will avoid this problem.
4156
4157
See L{ParseResults.pprint} for an example of a recursive parser created using
4158
C{Forward}.
4159
"""
4160
def __init__( self, other=None ):
4161
super(Forward,self).__init__( other, savelist=False )
4162
4163
def __lshift__( self, other ):
4164
if isinstance( other, basestring ):
4165
other = ParserElement._literalStringClass(other)
4166
self.expr = other
4167
self.strRepr = None
4168
self.mayIndexError = self.expr.mayIndexError
4169
self.mayReturnEmpty = self.expr.mayReturnEmpty
4170
self.setWhitespaceChars( self.expr.whiteChars )
4171
self.skipWhitespace = self.expr.skipWhitespace
4172
self.saveAsList = self.expr.saveAsList
4173
self.ignoreExprs.extend(self.expr.ignoreExprs)
4174
return self
4175
4176
def __ilshift__(self, other):
4177
return self << other
4178
4179
def leaveWhitespace( self ):
4180
self.skipWhitespace = False
4181
return self
4182
4183
def streamline( self ):
4184
if not self.streamlined:
4185
self.streamlined = True
4186
if self.expr is not None:
4187
self.expr.streamline()
4188
return self
4189
4190
def validate( self, validateTrace=[] ):
4191
if self not in validateTrace:
4192
tmp = validateTrace[:]+[self]
4193
if self.expr is not None:
4194
self.expr.validate(tmp)
4195
self.checkRecursion([])
4196
4197
def __str__( self ):
4198
if hasattr(self,"name"):
4199
return self.name
4200
return self.__class__.__name__ + ": ..."
4201
4202
# stubbed out for now - creates awful memory and perf issues
4203
self._revertClass = self.__class__
4204
self.__class__ = _ForwardNoRecurse
4205
try:
4206
if self.expr is not None:
4207
retString = _ustr(self.expr)
4208
else:
4209
retString = "None"
4210
finally:
4211
self.__class__ = self._revertClass
4212
return self.__class__.__name__ + ": " + retString
4213
4214
def copy(self):
4215
if self.expr is not None:
4216
return super(Forward,self).copy()
4217
else:
4218
ret = Forward()
4219
ret <<= self
4220
return ret
4221
4222
class _ForwardNoRecurse(Forward):
4223
def __str__( self ):
4224
return "..."
4225
4226
class TokenConverter(ParseElementEnhance):
4227
"""
4228
Abstract subclass of C{ParseExpression}, for converting parsed results.
4229
"""
4230
def __init__( self, expr, savelist=False ):
4231
super(TokenConverter,self).__init__( expr )#, savelist )
4232
self.saveAsList = False
4233
4234
class Combine(TokenConverter):
4235
"""
4236
Converter to concatenate all matching tokens to a single string.
4237
By default, the matching patterns must also be contiguous in the input string;
4238
this can be disabled by specifying C{'adjacent=False'} in the constructor.
4239
4240
Example::
4241
real = Word(nums) + '.' + Word(nums)
4242
print(real.parseString('3.1416')) # -> ['3', '.', '1416']
4243
# will also erroneously match the following
4244
print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
4245
4246
real = Combine(Word(nums) + '.' + Word(nums))
4247
print(real.parseString('3.1416')) # -> ['3.1416']
4248
# no match when there are internal spaces
4249
print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
4250
"""
4251
def __init__( self, expr, joinString="", adjacent=True ):
4252
super(Combine,self).__init__( expr )
4253
# suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
4254
if adjacent:
4255
self.leaveWhitespace()
4256
self.adjacent = adjacent
4257
self.skipWhitespace = True
4258
self.joinString = joinString
4259
self.callPreparse = True
4260
4261
def ignore( self, other ):
4262
if self.adjacent:
4263
ParserElement.ignore(self, other)
4264
else:
4265
super( Combine, self).ignore( other )
4266
return self
4267
4268
def postParse( self, instring, loc, tokenlist ):
4269
retToks = tokenlist.copy()
4270
del retToks[:]
4271
retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
4272
4273
if self.resultsName and retToks.haskeys():
4274
return [ retToks ]
4275
else:
4276
return retToks
4277
4278
class Group(TokenConverter):
4279
"""
4280
Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
4281
4282
Example::
4283
ident = Word(alphas)
4284
num = Word(nums)
4285
term = ident | num
4286
func = ident + Optional(delimitedList(term))
4287
print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
4288
4289
func = ident + Group(Optional(delimitedList(term)))
4290
print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
4291
"""
4292
def __init__( self, expr ):
4293
super(Group,self).__init__( expr )
4294
self.saveAsList = True
4295
4296
def postParse( self, instring, loc, tokenlist ):
4297
return [ tokenlist ]
4298
4299
class Dict(TokenConverter):
4300
"""
4301
Converter to return a repetitive expression as a list, but also as a dictionary.
4302
Each element can also be referenced using the first token in the expression as its key.
4303
Useful for tabular report scraping when the first column can be used as a item key.
4304
4305
Example::
4306
data_word = Word(alphas)
4307
label = data_word + FollowedBy(':')
4308
attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4309
4310
text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4311
attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4312
4313
# print attributes as plain groups
4314
print(OneOrMore(attr_expr).parseString(text).dump())
4315
4316
# instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
4317
result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
4318
print(result.dump())
4319
4320
# access named fields as dict entries, or output as dict
4321
print(result['shape'])
4322
print(result.asDict())
4323
prints::
4324
['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
4325
4326
[['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4327
- color: light blue
4328
- posn: upper left
4329
- shape: SQUARE
4330
- texture: burlap
4331
SQUARE
4332
{'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
4333
See more examples at L{ParseResults} of accessing fields by results name.
4334
"""
4335
def __init__( self, expr ):
4336
super(Dict,self).__init__( expr )
4337
self.saveAsList = True
4338
4339
def postParse( self, instring, loc, tokenlist ):
4340
for i,tok in enumerate(tokenlist):
4341
if len(tok) == 0:
4342
continue
4343
ikey = tok[0]
4344
if isinstance(ikey,int):
4345
ikey = _ustr(tok[0]).strip()
4346
if len(tok)==1:
4347
tokenlist[ikey] = _ParseResultsWithOffset("",i)
4348
elif len(tok)==2 and not isinstance(tok[1],ParseResults):
4349
tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
4350
else:
4351
dictvalue = tok.copy() #ParseResults(i)
4352
del dictvalue[0]
4353
if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
4354
tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
4355
else:
4356
tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
4357
4358
if self.resultsName:
4359
return [ tokenlist ]
4360
else:
4361
return tokenlist
4362
4363
4364
class Suppress(TokenConverter):
4365
"""
4366
Converter for ignoring the results of a parsed expression.
4367
4368
Example::
4369
source = "a, b, c,d"
4370
wd = Word(alphas)
4371
wd_list1 = wd + ZeroOrMore(',' + wd)
4372
print(wd_list1.parseString(source))
4373
4374
# often, delimiters that are useful during parsing are just in the
4375
# way afterward - use Suppress to keep them out of the parsed output
4376
wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
4377
print(wd_list2.parseString(source))
4378
prints::
4379
['a', ',', 'b', ',', 'c', ',', 'd']
4380
['a', 'b', 'c', 'd']
4381
(See also L{delimitedList}.)
4382
"""
4383
def postParse( self, instring, loc, tokenlist ):
4384
return []
4385
4386
def suppress( self ):
4387
return self
4388
4389
4390
class OnlyOnce(object):
4391
"""
4392
Wrapper for parse actions, to ensure they are only called once.
4393
"""
4394
def __init__(self, methodCall):
4395
self.callable = _trim_arity(methodCall)
4396
self.called = False
4397
def __call__(self,s,l,t):
4398
if not self.called:
4399
results = self.callable(s,l,t)
4400
self.called = True
4401
return results
4402
raise ParseException(s,l,"")
4403
def reset(self):
4404
self.called = False
4405
4406
def traceParseAction(f):
4407
"""
4408
Decorator for debugging parse actions.
4409
4410
When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
4411
When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
4412
4413
Example::
4414
wd = Word(alphas)
4415
4416
@traceParseAction
4417
def remove_duplicate_chars(tokens):
4418
return ''.join(sorted(set(''.join(tokens))))
4419
4420
wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
4421
print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
4422
prints::
4423
>>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
4424
<<leaving remove_duplicate_chars (ret: 'dfjkls')
4425
['dfjkls']
4426
"""
4427
f = _trim_arity(f)
4428
def z(*paArgs):
4429
thisFunc = f.__name__
4430
s,l,t = paArgs[-3:]
4431
if len(paArgs)>3:
4432
thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
4433
sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
4434
try:
4435
ret = f(*paArgs)
4436
except Exception as exc:
4437
sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
4438
raise
4439
sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
4440
return ret
4441
try:
4442
z.__name__ = f.__name__
4443
except AttributeError:
4444
pass
4445
return z
4446
4447
#
4448
# global helpers
4449
#
4450
def delimitedList( expr, delim=",", combine=False ):
4451
"""
4452
Helper to define a delimited list of expressions - the delimiter defaults to ','.
4453
By default, the list elements and delimiters can have intervening whitespace, and
4454
comments, but this can be overridden by passing C{combine=True} in the constructor.
4455
If C{combine} is set to C{True}, the matching tokens are returned as a single token
4456
string, with the delimiters included; otherwise, the matching tokens are returned
4457
as a list of tokens, with the delimiters suppressed.
4458
4459
Example::
4460
delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
4461
delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
4462
"""
4463
dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
4464
if combine:
4465
return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
4466
else:
4467
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4468
4469
def countedArray( expr, intExpr=None ):
4470
"""
4471
Helper to define a counted list of expressions.
4472
This helper defines a pattern of the form::
4473
integer expr expr expr...
4474
where the leading integer tells how many expr expressions follow.
4475
The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
4476
4477
If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
4478
4479
Example::
4480
countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
4481
4482
# in this parser, the leading integer value is given in binary,
4483
# '10' indicating that 2 values are in the array
4484
binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
4485
countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
4486
"""
4487
arrayExpr = Forward()
4488
def countFieldParseAction(s,l,t):
4489
n = t[0]
4490
arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
4491
return []
4492
if intExpr is None:
4493
intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
4494
else:
4495
intExpr = intExpr.copy()
4496
intExpr.setName("arrayLen")
4497
intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
4498
return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
4499
4500
def _flatten(L):
4501
ret = []
4502
for i in L:
4503
if isinstance(i,list):
4504
ret.extend(_flatten(i))
4505
else:
4506
ret.append(i)
4507
return ret
4508
4509
def matchPreviousLiteral(expr):
4510
"""
4511
Helper to define an expression that is indirectly defined from
4512
the tokens matched in a previous expression, that is, it looks
4513
for a 'repeat' of a previous expression. For example::
4514
first = Word(nums)
4515
second = matchPreviousLiteral(first)
4516
matchExpr = first + ":" + second
4517
will match C{"1:1"}, but not C{"1:2"}. Because this matches a
4518
previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
4519
If this is not desired, use C{matchPreviousExpr}.
4520
Do I{not} use with packrat parsing enabled.
4521
"""
4522
rep = Forward()
4523
def copyTokenToRepeater(s,l,t):
4524
if t:
4525
if len(t) == 1:
4526
rep << t[0]
4527
else:
4528
# flatten t tokens
4529
tflat = _flatten(t.asList())
4530
rep << And(Literal(tt) for tt in tflat)
4531
else:
4532
rep << Empty()
4533
expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4534
rep.setName('(prev) ' + _ustr(expr))
4535
return rep
4536
4537
def matchPreviousExpr(expr):
4538
"""
4539
Helper to define an expression that is indirectly defined from
4540
the tokens matched in a previous expression, that is, it looks
4541
for a 'repeat' of a previous expression. For example::
4542
first = Word(nums)
4543
second = matchPreviousExpr(first)
4544
matchExpr = first + ":" + second
4545
will match C{"1:1"}, but not C{"1:2"}. Because this matches by
4546
expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
4547
the expressions are evaluated first, and then compared, so
4548
C{"1"} is compared with C{"10"}.
4549
Do I{not} use with packrat parsing enabled.
4550
"""
4551
rep = Forward()
4552
e2 = expr.copy()
4553
rep <<= e2
4554
def copyTokenToRepeater(s,l,t):
4555
matchTokens = _flatten(t.asList())
4556
def mustMatchTheseTokens(s,l,t):
4557
theseTokens = _flatten(t.asList())
4558
if theseTokens != matchTokens:
4559
raise ParseException("",0,"")
4560
rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
4561
expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4562
rep.setName('(prev) ' + _ustr(expr))
4563
return rep
4564
4565
def _escapeRegexRangeChars(s):
4566
#~ escape these chars: ^-]
4567
for c in r"\^-]":
4568
s = s.replace(c,_bslash+c)
4569
s = s.replace("\n",r"\n")
4570
s = s.replace("\t",r"\t")
4571
return _ustr(s)
4572
4573
def oneOf( strs, caseless=False, useRegex=True ):
4574
"""
4575
Helper to quickly define a set of alternative Literals, and makes sure to do
4576
longest-first testing when there is a conflict, regardless of the input order,
4577
but returns a C{L{MatchFirst}} for best performance.
4578
4579
Parameters:
4580
- strs - a string of space-delimited literals, or a collection of string literals
4581
- caseless - (default=C{False}) - treat all literals as caseless
4582
- useRegex - (default=C{True}) - as an optimization, will generate a Regex
4583
object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
4584
if creating a C{Regex} raises an exception)
4585
4586
Example::
4587
comp_oper = oneOf("< = > <= >= !=")
4588
var = Word(alphas)
4589
number = Word(nums)
4590
term = var | number
4591
comparison_expr = term + comp_oper + term
4592
print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
4593
prints::
4594
[['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
4595
"""
4596
if caseless:
4597
isequal = ( lambda a,b: a.upper() == b.upper() )
4598
masks = ( lambda a,b: b.upper().startswith(a.upper()) )
4599
parseElementClass = CaselessLiteral
4600
else:
4601
isequal = ( lambda a,b: a == b )
4602
masks = ( lambda a,b: b.startswith(a) )
4603
parseElementClass = Literal
4604
4605
symbols = []
4606
if isinstance(strs,basestring):
4607
symbols = strs.split()
4608
elif isinstance(strs, Iterable):
4609
symbols = list(strs)
4610
else:
4611
warnings.warn("Invalid argument to oneOf, expected string or iterable",
4612
SyntaxWarning, stacklevel=2)
4613
if not symbols:
4614
return NoMatch()
4615
4616
i = 0
4617
while i < len(symbols)-1:
4618
cur = symbols[i]
4619
for j,other in enumerate(symbols[i+1:]):
4620
if ( isequal(other, cur) ):
4621
del symbols[i+j+1]
4622
break
4623
elif ( masks(cur, other) ):
4624
del symbols[i+j+1]
4625
symbols.insert(i,other)
4626
cur = other
4627
break
4628
else:
4629
i += 1
4630
4631
if not caseless and useRegex:
4632
#~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
4633
try:
4634
if len(symbols)==len("".join(symbols)):
4635
return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
4636
else:
4637
return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
4638
except Exception:
4639
warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
4640
SyntaxWarning, stacklevel=2)
4641
4642
4643
# last resort, just use MatchFirst
4644
return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4645
4646
def dictOf( key, value ):
4647
"""
4648
Helper to easily and clearly define a dictionary by specifying the respective patterns
4649
for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
4650
in the proper order. The key pattern can include delimiting markers or punctuation,
4651
as long as they are suppressed, thereby leaving the significant key text. The value
4652
pattern can include named results, so that the C{Dict} results can include named token
4653
fields.
4654
4655
Example::
4656
text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4657
attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4658
print(OneOrMore(attr_expr).parseString(text).dump())
4659
4660
attr_label = label
4661
attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
4662
4663
# similar to Dict, but simpler call format
4664
result = dictOf(attr_label, attr_value).parseString(text)
4665
print(result.dump())
4666
print(result['shape'])
4667
print(result.shape) # object attribute access works too
4668
print(result.asDict())
4669
prints::
4670
[['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4671
- color: light blue
4672
- posn: upper left
4673
- shape: SQUARE
4674
- texture: burlap
4675
SQUARE
4676
SQUARE
4677
{'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
4678
"""
4679
return Dict( ZeroOrMore( Group ( key + value ) ) )
4680
4681
def originalTextFor(expr, asString=True):
4682
"""
4683
Helper to return the original, untokenized text for a given expression. Useful to
4684
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
4685
revert separate tokens with intervening whitespace back to the original matching
4686
input text. By default, returns astring containing the original parsed text.
4687
4688
If the optional C{asString} argument is passed as C{False}, then the return value is a
4689
C{L{ParseResults}} containing any results names that were originally matched, and a
4690
single token containing the original matched text from the input string. So if
4691
the expression passed to C{L{originalTextFor}} contains expressions with defined
4692
results names, you must set C{asString} to C{False} if you want to preserve those
4693
results name values.
4694
4695
Example::
4696
src = "this is test <b> bold <i>text</i> </b> normal text "
4697
for tag in ("b","i"):
4698
opener,closer = makeHTMLTags(tag)
4699
patt = originalTextFor(opener + SkipTo(closer) + closer)
4700
print(patt.searchString(src)[0])
4701
prints::
4702
['<b> bold <i>text</i> </b>']
4703
['<i>text</i>']
4704
"""
4705
locMarker = Empty().setParseAction(lambda s,loc,t: loc)
4706
endlocMarker = locMarker.copy()
4707
endlocMarker.callPreparse = False
4708
matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
4709
if asString:
4710
extractText = lambda s,l,t: s[t._original_start:t._original_end]
4711
else:
4712
def extractText(s,l,t):
4713
t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4714
matchExpr.setParseAction(extractText)
4715
matchExpr.ignoreExprs = expr.ignoreExprs
4716
return matchExpr
4717
4718
def ungroup(expr):
4719
"""
4720
Helper to undo pyparsing's default grouping of And expressions, even
4721
if all but one are non-empty.
4722
"""
4723
return TokenConverter(expr).setParseAction(lambda t:t[0])
4724
4725
def locatedExpr(expr):
4726
"""
4727
Helper to decorate a returned token with its starting and ending locations in the input string.
4728
This helper adds the following results names:
4729
- locn_start = location where matched expression begins
4730
- locn_end = location where matched expression ends
4731
- value = the actual parsed results
4732
4733
Be careful if the input text contains C{<TAB>} characters, you may want to call
4734
C{L{ParserElement.parseWithTabs}}
4735
4736
Example::
4737
wd = Word(alphas)
4738
for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
4739
print(match)
4740
prints::
4741
[[0, 'ljsdf', 5]]
4742
[[8, 'lksdjjf', 15]]
4743
[[18, 'lkkjj', 23]]
4744
"""
4745
locator = Empty().setParseAction(lambda s,l,t: l)
4746
return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4747
4748
4749
# convenience constants for positional expressions
4750
empty = Empty().setName("empty")
4751
lineStart = LineStart().setName("lineStart")
4752
lineEnd = LineEnd().setName("lineEnd")
4753
stringStart = StringStart().setName("stringStart")
4754
stringEnd = StringEnd().setName("stringEnd")
4755
4756
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
4757
_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
4758
_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
4759
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
4760
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
4761
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4762
4763
def srange(s):
4764
r"""
4765
Helper to easily define string ranges for use in Word construction. Borrows
4766
syntax from regexp '[]' string range definitions::
4767
srange("[0-9]") -> "0123456789"
4768
srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
4769
srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
4770
The input string must be enclosed in []'s, and the returned string is the expanded
4771
character set joined into a single string.
4772
The values enclosed in the []'s may be:
4773
- a single character
4774
- an escaped character with a leading backslash (such as C{\-} or C{\]})
4775
- an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
4776
(C{\0x##} is also supported for backwards compatibility)
4777
- an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
4778
- a range of any of the above, separated by a dash (C{'a-z'}, etc.)
4779
- any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
4780
"""
4781
_expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
4782
try:
4783
return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
4784
except Exception:
4785
return ""
4786
4787
def matchOnlyAtCol(n):
4788
"""
4789
Helper method for defining parse actions that require matching at a specific
4790
column in the input text.
4791
"""
4792
def verifyCol(strg,locn,toks):
4793
if col(locn,strg) != n:
4794
raise ParseException(strg,locn,"matched token not at column %d" % n)
4795
return verifyCol
4796
4797
def replaceWith(replStr):
4798
"""
4799
Helper method for common parse actions that simply return a literal value. Especially
4800
useful when used with C{L{transformString<ParserElement.transformString>}()}.
4801
4802
Example::
4803
num = Word(nums).setParseAction(lambda toks: int(toks[0]))
4804
na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
4805
term = na | num
4806
4807
OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
4808
"""
4809
return lambda s,l,t: [replStr]
4810
4811
def removeQuotes(s,l,t):
4812
"""
4813
Helper parse action for removing quotation marks from parsed quoted strings.
4814
4815
Example::
4816
# by default, quotation marks are included in parsed results
4817
quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
4818
4819
# use removeQuotes to strip quotation marks from parsed results
4820
quotedString.setParseAction(removeQuotes)
4821
quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
4822
"""
4823
return t[0][1:-1]
4824
4825
def tokenMap(func, *args):
4826
"""
4827
Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
4828
args are passed, they are forwarded to the given function as additional arguments after
4829
the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
4830
parsed data to an integer using base 16.
4831
4832
Example (compare the last to example in L{ParserElement.transformString}::
4833
hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
4834
hex_ints.runTests('''
4835
00 11 22 aa FF 0a 0d 1a
4836
''')
4837
4838
upperword = Word(alphas).setParseAction(tokenMap(str.upper))
4839
OneOrMore(upperword).runTests('''
4840
my kingdom for a horse
4841
''')
4842
4843
wd = Word(alphas).setParseAction(tokenMap(str.title))
4844
OneOrMore(wd).setParseAction(' '.join).runTests('''
4845
now is the winter of our discontent made glorious summer by this sun of york
4846
''')
4847
prints::
4848
00 11 22 aa FF 0a 0d 1a
4849
[0, 17, 34, 170, 255, 10, 13, 26]
4850
4851
my kingdom for a horse
4852
['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
4853
4854
now is the winter of our discontent made glorious summer by this sun of york
4855
['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
4856
"""
4857
def pa(s,l,t):
4858
return [func(tokn, *args) for tokn in t]
4859
4860
try:
4861
func_name = getattr(func, '__name__',
4862
getattr(func, '__class__').__name__)
4863
except Exception:
4864
func_name = str(func)
4865
pa.__name__ = func_name
4866
4867
return pa
4868
4869
upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
4870
"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
4871
4872
downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
4873
"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
4874
4875
def _makeTags(tagStr, xml):
4876
"""Internal helper to construct opening and closing tag expressions, given a tag name"""
4877
if isinstance(tagStr,basestring):
4878
resname = tagStr
4879
tagStr = Keyword(tagStr, caseless=not xml)
4880
else:
4881
resname = tagStr.name
4882
4883
tagAttrName = Word(alphas,alphanums+"_-:")
4884
if (xml):
4885
tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
4886
openTag = Suppress("<") + tagStr("tag") + \
4887
Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
4888
Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
4889
else:
4890
printablesLessRAbrack = "".join(c for c in printables if c not in ">")
4891
tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
4892
openTag = Suppress("<") + tagStr("tag") + \
4893
Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
4894
Optional( Suppress("=") + tagAttrValue ) ))) + \
4895
Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
4896
closeTag = Combine(_L("</") + tagStr + ">")
4897
4898
openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
4899
closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
4900
openTag.tag = resname
4901
closeTag.tag = resname
4902
return openTag, closeTag
4903
4904
def makeHTMLTags(tagStr):
4905
"""
4906
Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
4907
tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
4908
4909
Example::
4910
text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
4911
# makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
4912
a,a_end = makeHTMLTags("A")
4913
link_expr = a + SkipTo(a_end)("link_text") + a_end
4914
4915
for link in link_expr.searchString(text):
4916
# attributes in the <A> tag (like "href" shown here) are also accessible as named results
4917
print(link.link_text, '->', link.href)
4918
prints::
4919
pyparsing -> http://pyparsing.wikispaces.com
4920
"""
4921
return _makeTags( tagStr, False )
4922
4923
def makeXMLTags(tagStr):
4924
"""
4925
Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
4926
tags only in the given upper/lower case.
4927
4928
Example: similar to L{makeHTMLTags}
4929
"""
4930
return _makeTags( tagStr, True )
4931
4932
def withAttribute(*args,**attrDict):
4933
"""
4934
Helper to create a validating parse action to be used with start tags created
4935
with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
4936
with a required attribute value, to avoid false matches on common tags such as
4937
C{<TD>} or C{<DIV>}.
4938
4939
Call C{withAttribute} with a series of attribute names and values. Specify the list
4940
of filter attributes names and values as:
4941
- keyword arguments, as in C{(align="right")}, or
4942
- as an explicit dict with C{**} operator, when an attribute name is also a Python
4943
reserved word, as in C{**{"class":"Customer", "align":"right"}}
4944
- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
4945
For attribute names with a namespace prefix, you must use the second form. Attribute
4946
names are matched insensitive to upper/lower case.
4947
4948
If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
4949
4950
To verify that the attribute exists, but without specifying a value, pass
4951
C{withAttribute.ANY_VALUE} as the value.
4952
4953
Example::
4954
html = '''
4955
<div>
4956
Some text
4957
<div type="grid">1 4 0 1 0</div>
4958
<div type="graph">1,3 2,3 1,1</div>
4959
<div>this has no type</div>
4960
</div>
4961
4962
'''
4963
div,div_end = makeHTMLTags("div")
4964
4965
# only match div tag having a type attribute with value "grid"
4966
div_grid = div().setParseAction(withAttribute(type="grid"))
4967
grid_expr = div_grid + SkipTo(div | div_end)("body")
4968
for grid_header in grid_expr.searchString(html):
4969
print(grid_header.body)
4970
4971
# construct a match with any div tag having a type attribute, regardless of the value
4972
div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
4973
div_expr = div_any_type + SkipTo(div | div_end)("body")
4974
for div_header in div_expr.searchString(html):
4975
print(div_header.body)
4976
prints::
4977
1 4 0 1 0
4978
4979
1 4 0 1 0
4980
1,3 2,3 1,1
4981
"""
4982
if args:
4983
attrs = args[:]
4984
else:
4985
attrs = attrDict.items()
4986
attrs = [(k,v) for k,v in attrs]
4987
def pa(s,l,tokens):
4988
for attrName,attrValue in attrs:
4989
if attrName not in tokens:
4990
raise ParseException(s,l,"no matching attribute " + attrName)
4991
if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
4992
raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
4993
(attrName, tokens[attrName], attrValue))
4994
return pa
4995
withAttribute.ANY_VALUE = object()
4996
4997
def withClass(classname, namespace=''):
4998
"""
4999
Simplified version of C{L{withAttribute}} when matching on a div class - made
5000
difficult because C{class} is a reserved word in Python.
5001
5002
Example::
5003
html = '''
5004
<div>
5005
Some text
5006
<div class="grid">1 4 0 1 0</div>
5007
<div class="graph">1,3 2,3 1,1</div>
5008
<div>this &lt;div&gt; has no class</div>
5009
</div>
5010
5011
'''
5012
div,div_end = makeHTMLTags("div")
5013
div_grid = div().setParseAction(withClass("grid"))
5014
5015
grid_expr = div_grid + SkipTo(div | div_end)("body")
5016
for grid_header in grid_expr.searchString(html):
5017
print(grid_header.body)
5018
5019
div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
5020
div_expr = div_any_type + SkipTo(div | div_end)("body")
5021
for div_header in div_expr.searchString(html):
5022
print(div_header.body)
5023
prints::
5024
1 4 0 1 0
5025
5026
1 4 0 1 0
5027
1,3 2,3 1,1
5028
"""
5029
classattr = "%s:class" % namespace if namespace else "class"
5030
return withAttribute(**{classattr : classname})
5031
5032
opAssoc = _Constants()
5033
opAssoc.LEFT = object()
5034
opAssoc.RIGHT = object()
5035
5036
def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
5037
"""
5038
Helper method for constructing grammars of expressions made up of
5039
operators working in a precedence hierarchy. Operators may be unary or
5040
binary, left- or right-associative. Parse actions can also be attached
5041
to operator expressions. The generated parser will also recognize the use
5042
of parentheses to override operator precedences (see example below).
5043
5044
Note: if you define a deep operator list, you may see performance issues
5045
when using infixNotation. See L{ParserElement.enablePackrat} for a
5046
mechanism to potentially improve your parser performance.
5047
5048
Parameters:
5049
- baseExpr - expression representing the most basic element for the nested
5050
- opList - list of tuples, one for each operator precedence level in the
5051
expression grammar; each tuple is of the form
5052
(opExpr, numTerms, rightLeftAssoc, parseAction), where:
5053
- opExpr is the pyparsing expression for the operator;
5054
may also be a string, which will be converted to a Literal;
5055
if numTerms is 3, opExpr is a tuple of two expressions, for the
5056
two operators separating the 3 terms
5057
- numTerms is the number of terms for this operator (must
5058
be 1, 2, or 3)
5059
- rightLeftAssoc is the indicator whether the operator is
5060
right or left associative, using the pyparsing-defined
5061
constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
5062
- parseAction is the parse action to be associated with
5063
expressions matching this operator expression (the
5064
parse action tuple member may be omitted); if the parse action
5065
is passed a tuple or list of functions, this is equivalent to
5066
calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})
5067
- lpar - expression for matching left-parentheses (default=C{Suppress('(')})
5068
- rpar - expression for matching right-parentheses (default=C{Suppress(')')})
5069
5070
Example::
5071
# simple example of four-function arithmetic with ints and variable names
5072
integer = pyparsing_common.signed_integer
5073
varname = pyparsing_common.identifier
5074
5075
arith_expr = infixNotation(integer | varname,
5076
[
5077
('-', 1, opAssoc.RIGHT),
5078
(oneOf('* /'), 2, opAssoc.LEFT),
5079
(oneOf('+ -'), 2, opAssoc.LEFT),
5080
])
5081
5082
arith_expr.runTests('''
5083
5+3*6
5084
(5+3)*6
5085
-2--11
5086
''', fullDump=False)
5087
prints::
5088
5+3*6
5089
[[5, '+', [3, '*', 6]]]
5090
5091
(5+3)*6
5092
[[[5, '+', 3], '*', 6]]
5093
5094
-2--11
5095
[[['-', 2], '-', ['-', 11]]]
5096
"""
5097
ret = Forward()
5098
lastExpr = baseExpr | ( lpar + ret + rpar )
5099
for i,operDef in enumerate(opList):
5100
opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
5101
termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
5102
if arity == 3:
5103
if opExpr is None or len(opExpr) != 2:
5104
raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
5105
opExpr1, opExpr2 = opExpr
5106
thisExpr = Forward().setName(termName)
5107
if rightLeftAssoc == opAssoc.LEFT:
5108
if arity == 1:
5109
matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
5110
elif arity == 2:
5111
if opExpr is not None:
5112
matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
5113
else:
5114
matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
5115
elif arity == 3:
5116
matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
5117
Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
5118
else:
5119
raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5120
elif rightLeftAssoc == opAssoc.RIGHT:
5121
if arity == 1:
5122
# try to avoid LR with this extra test
5123
if not isinstance(opExpr, Optional):
5124
opExpr = Optional(opExpr)
5125
matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
5126
elif arity == 2:
5127
if opExpr is not None:
5128
matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
5129
else:
5130
matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
5131
elif arity == 3:
5132
matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
5133
Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
5134
else:
5135
raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5136
else:
5137
raise ValueError("operator must indicate right or left associativity")
5138
if pa:
5139
if isinstance(pa, (tuple, list)):
5140
matchExpr.setParseAction(*pa)
5141
else:
5142
matchExpr.setParseAction(pa)
5143
thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
5144
lastExpr = thisExpr
5145
ret <<= lastExpr
5146
return ret
5147
5148
operatorPrecedence = infixNotation
5149
"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
5150
5151
dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
5152
sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
5153
quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
5154
Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
5155
unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5156
5157
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
5158
"""
5159
Helper method for defining nested lists enclosed in opening and closing
5160
delimiters ("(" and ")" are the default).
5161
5162
Parameters:
5163
- opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
5164
- closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
5165
- content - expression for items within the nested lists (default=C{None})
5166
- ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
5167
5168
If an expression is not provided for the content argument, the nested
5169
expression will capture all whitespace-delimited content between delimiters
5170
as a list of separate values.
5171
5172
Use the C{ignoreExpr} argument to define expressions that may contain
5173
opening or closing characters that should not be treated as opening
5174
or closing characters for nesting, such as quotedString or a comment
5175
expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
5176
The default is L{quotedString}, but if no expressions are to be ignored,
5177
then pass C{None} for this argument.
5178
5179
Example::
5180
data_type = oneOf("void int short long char float double")
5181
decl_data_type = Combine(data_type + Optional(Word('*')))
5182
ident = Word(alphas+'_', alphanums+'_')
5183
number = pyparsing_common.number
5184
arg = Group(decl_data_type + ident)
5185
LPAR,RPAR = map(Suppress, "()")
5186
5187
code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
5188
5189
c_function = (decl_data_type("type")
5190
+ ident("name")
5191
+ LPAR + Optional(delimitedList(arg), [])("args") + RPAR
5192
+ code_body("body"))
5193
c_function.ignore(cStyleComment)
5194
5195
source_code = '''
5196
int is_odd(int x) {
5197
return (x%2);
5198
}
5199
5200
int dec_to_hex(char hchar) {
5201
if (hchar >= '0' && hchar <= '9') {
5202
return (ord(hchar)-ord('0'));
5203
} else {
5204
return (10+ord(hchar)-ord('A'));
5205
}
5206
}
5207
'''
5208
for func in c_function.searchString(source_code):
5209
print("%(name)s (%(type)s) args: %(args)s" % func)
5210
5211
prints::
5212
is_odd (int) args: [['int', 'x']]
5213
dec_to_hex (int) args: [['char', 'hchar']]
5214
"""
5215
if opener == closer:
5216
raise ValueError("opening and closing strings cannot be the same")
5217
if content is None:
5218
if isinstance(opener,basestring) and isinstance(closer,basestring):
5219
if len(opener) == 1 and len(closer)==1:
5220
if ignoreExpr is not None:
5221
content = (Combine(OneOrMore(~ignoreExpr +
5222
CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5223
).setParseAction(lambda t:t[0].strip()))
5224
else:
5225
content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
5226
).setParseAction(lambda t:t[0].strip()))
5227
else:
5228
if ignoreExpr is not None:
5229
content = (Combine(OneOrMore(~ignoreExpr +
5230
~Literal(opener) + ~Literal(closer) +
5231
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5232
).setParseAction(lambda t:t[0].strip()))
5233
else:
5234
content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
5235
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5236
).setParseAction(lambda t:t[0].strip()))
5237
else:
5238
raise ValueError("opening and closing arguments must be strings if no content expression is given")
5239
ret = Forward()
5240
if ignoreExpr is not None:
5241
ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
5242
else:
5243
ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
5244
ret.setName('nested %s%s expression' % (opener,closer))
5245
return ret
5246
5247
def indentedBlock(blockStatementExpr, indentStack, indent=True):
5248
"""
5249
Helper method for defining space-delimited indentation blocks, such as
5250
those used to define block statements in Python source code.
5251
5252
Parameters:
5253
- blockStatementExpr - expression defining syntax of statement that
5254
is repeated within the indented block
5255
- indentStack - list created by caller to manage indentation stack
5256
(multiple statementWithIndentedBlock expressions within a single grammar
5257
should share a common indentStack)
5258
- indent - boolean indicating whether block must be indented beyond the
5259
the current level; set to False for block of left-most statements
5260
(default=C{True})
5261
5262
A valid block must contain at least one C{blockStatement}.
5263
5264
Example::
5265
data = '''
5266
def A(z):
5267
A1
5268
B = 100
5269
G = A2
5270
A2
5271
A3
5272
B
5273
def BB(a,b,c):
5274
BB1
5275
def BBA():
5276
bba1
5277
bba2
5278
bba3
5279
C
5280
D
5281
def spam(x,y):
5282
def eggs(z):
5283
pass
5284
'''
5285
5286
5287
indentStack = [1]
5288
stmt = Forward()
5289
5290
identifier = Word(alphas, alphanums)
5291
funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
5292
func_body = indentedBlock(stmt, indentStack)
5293
funcDef = Group( funcDecl + func_body )
5294
5295
rvalue = Forward()
5296
funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
5297
rvalue << (funcCall | identifier | Word(nums))
5298
assignment = Group(identifier + "=" + rvalue)
5299
stmt << ( funcDef | assignment | identifier )
5300
5301
module_body = OneOrMore(stmt)
5302
5303
parseTree = module_body.parseString(data)
5304
parseTree.pprint()
5305
prints::
5306
[['def',
5307
'A',
5308
['(', 'z', ')'],
5309
':',
5310
[['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
5311
'B',
5312
['def',
5313
'BB',
5314
['(', 'a', 'b', 'c', ')'],
5315
':',
5316
[['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
5317
'C',
5318
'D',
5319
['def',
5320
'spam',
5321
['(', 'x', 'y', ')'],
5322
':',
5323
[[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
5324
"""
5325
def checkPeerIndent(s,l,t):
5326
if l >= len(s): return
5327
curCol = col(l,s)
5328
if curCol != indentStack[-1]:
5329
if curCol > indentStack[-1]:
5330
raise ParseFatalException(s,l,"illegal nesting")
5331
raise ParseException(s,l,"not a peer entry")
5332
5333
def checkSubIndent(s,l,t):
5334
curCol = col(l,s)
5335
if curCol > indentStack[-1]:
5336
indentStack.append( curCol )
5337
else:
5338
raise ParseException(s,l,"not a subentry")
5339
5340
def checkUnindent(s,l,t):
5341
if l >= len(s): return
5342
curCol = col(l,s)
5343
if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
5344
raise ParseException(s,l,"not an unindent")
5345
indentStack.pop()
5346
5347
NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
5348
INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
5349
PEER = Empty().setParseAction(checkPeerIndent).setName('')
5350
UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
5351
if indent:
5352
smExpr = Group( Optional(NL) +
5353
#~ FollowedBy(blockStatementExpr) +
5354
INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
5355
else:
5356
smExpr = Group( Optional(NL) +
5357
(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
5358
blockStatementExpr.ignore(_bslash + LineEnd())
5359
return smExpr.setName('indented block')
5360
5361
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5362
punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5363
5364
anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
5365
_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
5366
commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5367
def replaceHTMLEntity(t):
5368
"""Helper parser action to replace common HTML entities with their special characters"""
5369
return _htmlEntityMap.get(t.entity)
5370
5371
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
5372
cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
5373
"Comment of the form C{/* ... */}"
5374
5375
htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
5376
"Comment of the form C{<!-- ... -->}"
5377
5378
restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
5379
dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
5380
"Comment of the form C{// ... (to end of line)}"
5381
5382
cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
5383
"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
5384
5385
javaStyleComment = cppStyleComment
5386
"Same as C{L{cppStyleComment}}"
5387
5388
pythonStyleComment = Regex(r"#.*").setName("Python style comment")
5389
"Comment of the form C{# ... (to end of line)}"
5390
5391
_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
5392
Optional( Word(" \t") +
5393
~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
5394
commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
5395
"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
5396
This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
5397
5398
# some other useful expressions - using lower-case class name since we are really using this as a namespace
5399
class pyparsing_common:
5400
"""
5401
Here are some common low-level expressions that may be useful in jump-starting parser development:
5402
- numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
5403
- common L{programming identifiers<identifier>}
5404
- network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
5405
- ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
5406
- L{UUID<uuid>}
5407
- L{comma-separated list<comma_separated_list>}
5408
Parse actions:
5409
- C{L{convertToInteger}}
5410
- C{L{convertToFloat}}
5411
- C{L{convertToDate}}
5412
- C{L{convertToDatetime}}
5413
- C{L{stripHTMLTags}}
5414
- C{L{upcaseTokens}}
5415
- C{L{downcaseTokens}}
5416
5417
Example::
5418
pyparsing_common.number.runTests('''
5419
# any int or real number, returned as the appropriate type
5420
100
5421
-100
5422
+100
5423
3.14159
5424
6.02e23
5425
1e-12
5426
''')
5427
5428
pyparsing_common.fnumber.runTests('''
5429
# any int or real number, returned as float
5430
100
5431
-100
5432
+100
5433
3.14159
5434
6.02e23
5435
1e-12
5436
''')
5437
5438
pyparsing_common.hex_integer.runTests('''
5439
# hex numbers
5440
100
5441
FF
5442
''')
5443
5444
pyparsing_common.fraction.runTests('''
5445
# fractions
5446
1/2
5447
-3/4
5448
''')
5449
5450
pyparsing_common.mixed_integer.runTests('''
5451
# mixed fractions
5452
1
5453
1/2
5454
-3/4
5455
1-3/4
5456
''')
5457
5458
import uuid
5459
pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5460
pyparsing_common.uuid.runTests('''
5461
# uuid
5462
12345678-1234-5678-1234-567812345678
5463
''')
5464
prints::
5465
# any int or real number, returned as the appropriate type
5466
100
5467
[100]
5468
5469
-100
5470
[-100]
5471
5472
+100
5473
[100]
5474
5475
3.14159
5476
[3.14159]
5477
5478
6.02e23
5479
[6.02e+23]
5480
5481
1e-12
5482
[1e-12]
5483
5484
# any int or real number, returned as float
5485
100
5486
[100.0]
5487
5488
-100
5489
[-100.0]
5490
5491
+100
5492
[100.0]
5493
5494
3.14159
5495
[3.14159]
5496
5497
6.02e23
5498
[6.02e+23]
5499
5500
1e-12
5501
[1e-12]
5502
5503
# hex numbers
5504
100
5505
[256]
5506
5507
FF
5508
[255]
5509
5510
# fractions
5511
1/2
5512
[0.5]
5513
5514
-3/4
5515
[-0.75]
5516
5517
# mixed fractions
5518
1
5519
[1]
5520
5521
1/2
5522
[0.5]
5523
5524
-3/4
5525
[-0.75]
5526
5527
1-3/4
5528
[1.75]
5529
5530
# uuid
5531
12345678-1234-5678-1234-567812345678
5532
[UUID('12345678-1234-5678-1234-567812345678')]
5533
"""
5534
5535
convertToInteger = tokenMap(int)
5536
"""
5537
Parse action for converting parsed integers to Python int
5538
"""
5539
5540
convertToFloat = tokenMap(float)
5541
"""
5542
Parse action for converting parsed numbers to Python float
5543
"""
5544
5545
integer = Word(nums).setName("integer").setParseAction(convertToInteger)
5546
"""expression that parses an unsigned integer, returns an int"""
5547
5548
hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
5549
"""expression that parses a hexadecimal integer, returns an int"""
5550
5551
signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
5552
"""expression that parses an integer with optional leading sign, returns an int"""
5553
5554
fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
5555
"""fractional expression of an integer divided by an integer, returns a float"""
5556
fraction.addParseAction(lambda t: t[0]/t[-1])
5557
5558
mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
5559
"""mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
5560
mixed_integer.addParseAction(sum)
5561
5562
real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
5563
"""expression that parses a floating point number and returns a float"""
5564
5565
sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
5566
"""expression that parses a floating point number with optional scientific notation and returns a float"""
5567
5568
# streamlining this expression makes the docs nicer-looking
5569
number = (sci_real | real | signed_integer).streamline()
5570
"""any numeric expression, returns the corresponding Python type"""
5571
5572
fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
5573
"""any int or real number, returned as float"""
5574
5575
identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
5576
"""typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
5577
5578
ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
5579
"IPv4 address (C{0.0.0.0 - 255.255.255.255})"
5580
5581
_ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
5582
_full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
5583
_short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
5584
_short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
5585
_mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
5586
ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
5587
"IPv6 address (long, short, or mixed form)"
5588
5589
mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
5590
"MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
5591
5592
@staticmethod
5593
def convertToDate(fmt="%Y-%m-%d"):
5594
"""
5595
Helper to create a parse action for converting parsed date string to Python datetime.date
5596
5597
Params -
5598
- fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
5599
5600
Example::
5601
date_expr = pyparsing_common.iso8601_date.copy()
5602
date_expr.setParseAction(pyparsing_common.convertToDate())
5603
print(date_expr.parseString("1999-12-31"))
5604
prints::
5605
[datetime.date(1999, 12, 31)]
5606
"""
5607
def cvt_fn(s,l,t):
5608
try:
5609
return datetime.strptime(t[0], fmt).date()
5610
except ValueError as ve:
5611
raise ParseException(s, l, str(ve))
5612
return cvt_fn
5613
5614
@staticmethod
5615
def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5616
"""
5617
Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
5618
5619
Params -
5620
- fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
5621
5622
Example::
5623
dt_expr = pyparsing_common.iso8601_datetime.copy()
5624
dt_expr.setParseAction(pyparsing_common.convertToDatetime())
5625
print(dt_expr.parseString("1999-12-31T23:59:59.999"))
5626
prints::
5627
[datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
5628
"""
5629
def cvt_fn(s,l,t):
5630
try:
5631
return datetime.strptime(t[0], fmt)
5632
except ValueError as ve:
5633
raise ParseException(s, l, str(ve))
5634
return cvt_fn
5635
5636
iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
5637
"ISO8601 date (C{yyyy-mm-dd})"
5638
5639
iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
5640
"ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
5641
5642
uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
5643
"UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
5644
5645
_html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
5646
@staticmethod
5647
def stripHTMLTags(s, l, tokens):
5648
"""
5649
Parse action to remove HTML tags from web page HTML source
5650
5651
Example::
5652
# strip HTML links from normal text
5653
text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
5654
td,td_end = makeHTMLTags("TD")
5655
table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
5656
5657
print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
5658
"""
5659
return pyparsing_common._html_stripper.transformString(tokens[0])
5660
5661
_commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
5662
+ Optional( White(" \t") ) ) ).streamline().setName("commaItem")
5663
comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
5664
"""Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5665
5666
upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
5667
"""Parse action to convert tokens to upper case."""
5668
5669
downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
5670
"""Parse action to convert tokens to lower case."""
5671
5672
5673
if __name__ == "__main__":
5674
5675
selectToken = CaselessLiteral("select")
5676
fromToken = CaselessLiteral("from")
5677
5678
ident = Word(alphas, alphanums + "_$")
5679
5680
columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5681
columnNameList = Group(delimitedList(columnName)).setName("columns")
5682
columnSpec = ('*' | columnNameList)
5683
5684
tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5685
tableNameList = Group(delimitedList(tableName)).setName("tables")
5686
5687
simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
5688
5689
# demo runTests method, including embedded comments in test string
5690
simpleSQL.runTests("""
5691
# '*' as column list and dotted table name
5692
select * from SYS.XYZZY
5693
5694
# caseless match on "SELECT", and casts back to "select"
5695
SELECT * from XYZZY, ABC
5696
5697
# list of column names, and mixed case SELECT keyword
5698
Select AA,BB,CC from Sys.dual
5699
5700
# multiple tables
5701
Select A, B, C from Sys.dual, Table2
5702
5703
# invalid SELECT keyword - should fail
5704
Xelect A, B, C from Sys.dual
5705
5706
# incomplete command - should fail
5707
Select
5708
5709
# invalid column name - should fail
5710
Select ^^^ frox Sys.dual
5711
5712
""")
5713
5714
pyparsing_common.number.runTests("""
5715
100
5716
-100
5717
+100
5718
3.14159
5719
6.02e23
5720
1e-12
5721
""")
5722
5723
# any int or real number, returned as float
5724
pyparsing_common.fnumber.runTests("""
5725
100
5726
-100
5727
+100
5728
3.14159
5729
6.02e23
5730
1e-12
5731
""")
5732
5733
pyparsing_common.hex_integer.runTests("""
5734
100
5735
FF
5736
""")
5737
5738
import uuid
5739
pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5740
pyparsing_common.uuid.runTests("""
5741
12345678-1234-5678-1234-567812345678
5742
""")
5743
5744