Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/third_party/ply/example/GardenSnake/GardenSnake.py
7087 views
1
# GardenSnake - a parser generator demonstration program
2
#
3
# This implements a modified version of a subset of Python:
4
# - only 'def', 'return' and 'if' statements
5
# - 'if' only has 'then' clause (no elif nor else)
6
# - single-quoted strings only, content in raw format
7
# - numbers are decimal.Decimal instances (not integers or floats)
8
# - no print statment; use the built-in 'print' function
9
# - only < > == + - / * implemented (and unary + -)
10
# - assignment and tuple assignment work
11
# - no generators of any sort
12
# - no ... well, no quite a lot
13
14
# Why? I'm thinking about a new indentation-based configuration
15
# language for a project and wanted to figure out how to do it. Once
16
# I got that working I needed a way to test it out. My original AST
17
# was dumb so I decided to target Python's AST and compile it into
18
# Python code. Plus, it's pretty cool that it only took a day or so
19
# from sitting down with Ply to having working code.
20
21
# This uses David Beazley's Ply from http://www.dabeaz.com/ply/
22
23
# This work is hereby released into the Public Domain. To view a copy of
24
# the public domain dedication, visit
25
# http://creativecommons.org/licenses/publicdomain/ or send a letter to
26
# Creative Commons, 543 Howard Street, 5th Floor, San Francisco,
27
# California, 94105, USA.
28
#
29
# Portions of this work are derived from Python's Grammar definition
30
# and may be covered under the Python copyright and license
31
#
32
# Andrew Dalke / Dalke Scientific Software, LLC
33
# 30 August 2006 / Cape Town, South Africa
34
35
# Changelog:
36
# 30 August - added link to CC license; removed the "swapcase" encoding
37
38
# Modifications for inclusion in PLY distribution
39
import sys
40
sys.path.insert(0,"../..")
41
from ply import *
42
43
##### Lexer ######
44
#import lex
45
import decimal
46
47
tokens = (
48
'DEF',
49
'IF',
50
'NAME',
51
'NUMBER', # Python decimals
52
'STRING', # single quoted strings only; syntax of raw strings
53
'LPAR',
54
'RPAR',
55
'COLON',
56
'EQ',
57
'ASSIGN',
58
'LT',
59
'GT',
60
'PLUS',
61
'MINUS',
62
'MULT',
63
'DIV',
64
'RETURN',
65
'WS',
66
'NEWLINE',
67
'COMMA',
68
'SEMICOLON',
69
'INDENT',
70
'DEDENT',
71
'ENDMARKER',
72
)
73
74
#t_NUMBER = r'\d+'
75
# taken from decmial.py but without the leading sign
76
def t_NUMBER(t):
77
r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
78
t.value = decimal.Decimal(t.value)
79
return t
80
81
def t_STRING(t):
82
r"'([^\\']+|\\'|\\\\)*'" # I think this is right ...
83
t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun
84
return t
85
86
t_COLON = r':'
87
t_EQ = r'=='
88
t_ASSIGN = r'='
89
t_LT = r'<'
90
t_GT = r'>'
91
t_PLUS = r'\+'
92
t_MINUS = r'-'
93
t_MULT = r'\*'
94
t_DIV = r'/'
95
t_COMMA = r','
96
t_SEMICOLON = r';'
97
98
# Ply nicely documented how to do this.
99
100
RESERVED = {
101
"def": "DEF",
102
"if": "IF",
103
"return": "RETURN",
104
}
105
106
def t_NAME(t):
107
r'[a-zA-Z_][a-zA-Z0-9_]*'
108
t.type = RESERVED.get(t.value, "NAME")
109
return t
110
111
# Putting this before t_WS let it consume lines with only comments in
112
# them so the latter code never sees the WS part. Not consuming the
113
# newline. Needed for "if 1: #comment"
114
def t_comment(t):
115
r"[ ]*\043[^\n]*" # \043 is '#'
116
pass
117
118
119
# Whitespace
120
def t_WS(t):
121
r' [ ]+ '
122
if t.lexer.at_line_start and t.lexer.paren_count == 0:
123
return t
124
125
# Don't generate newline tokens when inside of parenthesis, eg
126
# a = (1,
127
# 2, 3)
128
def t_newline(t):
129
r'\n+'
130
t.lexer.lineno += len(t.value)
131
t.type = "NEWLINE"
132
if t.lexer.paren_count == 0:
133
return t
134
135
def t_LPAR(t):
136
r'\('
137
t.lexer.paren_count += 1
138
return t
139
140
def t_RPAR(t):
141
r'\)'
142
# check for underflow? should be the job of the parser
143
t.lexer.paren_count -= 1
144
return t
145
146
147
def t_error(t):
148
raise SyntaxError("Unknown symbol %r" % (t.value[0],))
149
print "Skipping", repr(t.value[0])
150
t.lexer.skip(1)
151
152
## I implemented INDENT / DEDENT generation as a post-processing filter
153
154
# The original lex token stream contains WS and NEWLINE characters.
155
# WS will only occur before any other tokens on a line.
156
157
# I have three filters. One tags tokens by adding two attributes.
158
# "must_indent" is True if the token must be indented from the
159
# previous code. The other is "at_line_start" which is True for WS
160
# and the first non-WS/non-NEWLINE on a line. It flags the check so
161
# see if the new line has changed indication level.
162
163
# Python's syntax has three INDENT states
164
# 0) no colon hence no need to indent
165
# 1) "if 1: go()" - simple statements have a COLON but no need for an indent
166
# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
167
NO_INDENT = 0
168
MAY_INDENT = 1
169
MUST_INDENT = 2
170
171
# only care about whitespace at the start of a line
172
def track_tokens_filter(lexer, tokens):
173
lexer.at_line_start = at_line_start = True
174
indent = NO_INDENT
175
saw_colon = False
176
for token in tokens:
177
token.at_line_start = at_line_start
178
179
if token.type == "COLON":
180
at_line_start = False
181
indent = MAY_INDENT
182
token.must_indent = False
183
184
elif token.type == "NEWLINE":
185
at_line_start = True
186
if indent == MAY_INDENT:
187
indent = MUST_INDENT
188
token.must_indent = False
189
190
elif token.type == "WS":
191
assert token.at_line_start == True
192
at_line_start = True
193
token.must_indent = False
194
195
else:
196
# A real token; only indent after COLON NEWLINE
197
if indent == MUST_INDENT:
198
token.must_indent = True
199
else:
200
token.must_indent = False
201
at_line_start = False
202
indent = NO_INDENT
203
204
yield token
205
lexer.at_line_start = at_line_start
206
207
def _new_token(type, lineno):
208
tok = lex.LexToken()
209
tok.type = type
210
tok.value = None
211
tok.lineno = lineno
212
return tok
213
214
# Synthesize a DEDENT tag
215
def DEDENT(lineno):
216
return _new_token("DEDENT", lineno)
217
218
# Synthesize an INDENT tag
219
def INDENT(lineno):
220
return _new_token("INDENT", lineno)
221
222
223
# Track the indentation level and emit the right INDENT / DEDENT events.
224
def indentation_filter(tokens):
225
# A stack of indentation levels; will never pop item 0
226
levels = [0]
227
token = None
228
depth = 0
229
prev_was_ws = False
230
for token in tokens:
231
## if 1:
232
## print "Process", token,
233
## if token.at_line_start:
234
## print "at_line_start",
235
## if token.must_indent:
236
## print "must_indent",
237
## print
238
239
# WS only occurs at the start of the line
240
# There may be WS followed by NEWLINE so
241
# only track the depth here. Don't indent/dedent
242
# until there's something real.
243
if token.type == "WS":
244
assert depth == 0
245
depth = len(token.value)
246
prev_was_ws = True
247
# WS tokens are never passed to the parser
248
continue
249
250
if token.type == "NEWLINE":
251
depth = 0
252
if prev_was_ws or token.at_line_start:
253
# ignore blank lines
254
continue
255
# pass the other cases on through
256
yield token
257
continue
258
259
# then it must be a real token (not WS, not NEWLINE)
260
# which can affect the indentation level
261
262
prev_was_ws = False
263
if token.must_indent:
264
# The current depth must be larger than the previous level
265
if not (depth > levels[-1]):
266
raise IndentationError("expected an indented block")
267
268
levels.append(depth)
269
yield INDENT(token.lineno)
270
271
elif token.at_line_start:
272
# Must be on the same level or one of the previous levels
273
if depth == levels[-1]:
274
# At the same level
275
pass
276
elif depth > levels[-1]:
277
raise IndentationError("indentation increase but not in new block")
278
else:
279
# Back up; but only if it matches a previous level
280
try:
281
i = levels.index(depth)
282
except ValueError:
283
raise IndentationError("inconsistent indentation")
284
for _ in range(i+1, len(levels)):
285
yield DEDENT(token.lineno)
286
levels.pop()
287
288
yield token
289
290
### Finished processing ###
291
292
# Must dedent any remaining levels
293
if len(levels) > 1:
294
assert token is not None
295
for _ in range(1, len(levels)):
296
yield DEDENT(token.lineno)
297
298
299
# The top-level filter adds an ENDMARKER, if requested.
300
# Python's grammar uses it.
301
def filter(lexer, add_endmarker = True):
302
token = None
303
tokens = iter(lexer.token, None)
304
tokens = track_tokens_filter(lexer, tokens)
305
for token in indentation_filter(tokens):
306
yield token
307
308
if add_endmarker:
309
lineno = 1
310
if token is not None:
311
lineno = token.lineno
312
yield _new_token("ENDMARKER", lineno)
313
314
# Combine Ply and my filters into a new lexer
315
316
class IndentLexer(object):
317
def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
318
self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags)
319
self.token_stream = None
320
def input(self, s, add_endmarker=True):
321
self.lexer.paren_count = 0
322
self.lexer.input(s)
323
self.token_stream = filter(self.lexer, add_endmarker)
324
def token(self):
325
try:
326
return self.token_stream.next()
327
except StopIteration:
328
return None
329
330
########## Parser (tokens -> AST) ######
331
332
# also part of Ply
333
#import yacc
334
335
# I use the Python AST
336
from compiler import ast
337
338
# Helper function
339
def Assign(left, right):
340
names = []
341
if isinstance(left, ast.Name):
342
# Single assignment on left
343
return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
344
elif isinstance(left, ast.Tuple):
345
# List of things - make sure they are Name nodes
346
names = []
347
for child in left.getChildren():
348
if not isinstance(child, ast.Name):
349
raise SyntaxError("that assignment not supported")
350
names.append(child.name)
351
ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
352
return ast.Assign([ast.AssTuple(ass_list)], right)
353
else:
354
raise SyntaxError("Can't do that yet")
355
356
357
# The grammar comments come from Python's Grammar/Grammar file
358
359
## NB: compound_stmt in single_input is followed by extra NEWLINE!
360
# file_input: (NEWLINE | stmt)* ENDMARKER
361
def p_file_input_end(p):
362
"""file_input_end : file_input ENDMARKER"""
363
p[0] = ast.Stmt(p[1])
364
def p_file_input(p):
365
"""file_input : file_input NEWLINE
366
| file_input stmt
367
| NEWLINE
368
| stmt"""
369
if isinstance(p[len(p)-1], basestring):
370
if len(p) == 3:
371
p[0] = p[1]
372
else:
373
p[0] = [] # p == 2 --> only a blank line
374
else:
375
if len(p) == 3:
376
p[0] = p[1] + p[2]
377
else:
378
p[0] = p[1]
379
380
381
# funcdef: [decorators] 'def' NAME parameters ':' suite
382
# ignoring decorators
383
def p_funcdef(p):
384
"funcdef : DEF NAME parameters COLON suite"
385
p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5])
386
387
# parameters: '(' [varargslist] ')'
388
def p_parameters(p):
389
"""parameters : LPAR RPAR
390
| LPAR varargslist RPAR"""
391
if len(p) == 3:
392
p[0] = []
393
else:
394
p[0] = p[2]
395
396
397
# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) |
398
# highly simplified
399
def p_varargslist(p):
400
"""varargslist : varargslist COMMA NAME
401
| NAME"""
402
if len(p) == 4:
403
p[0] = p[1] + p[3]
404
else:
405
p[0] = [p[1]]
406
407
# stmt: simple_stmt | compound_stmt
408
def p_stmt_simple(p):
409
"""stmt : simple_stmt"""
410
# simple_stmt is a list
411
p[0] = p[1]
412
413
def p_stmt_compound(p):
414
"""stmt : compound_stmt"""
415
p[0] = [p[1]]
416
417
# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
418
def p_simple_stmt(p):
419
"""simple_stmt : small_stmts NEWLINE
420
| small_stmts SEMICOLON NEWLINE"""
421
p[0] = p[1]
422
423
def p_small_stmts(p):
424
"""small_stmts : small_stmts SEMICOLON small_stmt
425
| small_stmt"""
426
if len(p) == 4:
427
p[0] = p[1] + [p[3]]
428
else:
429
p[0] = [p[1]]
430
431
# small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
432
# import_stmt | global_stmt | exec_stmt | assert_stmt
433
def p_small_stmt(p):
434
"""small_stmt : flow_stmt
435
| expr_stmt"""
436
p[0] = p[1]
437
438
# expr_stmt: testlist (augassign (yield_expr|testlist) |
439
# ('=' (yield_expr|testlist))*)
440
# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
441
# '<<=' | '>>=' | '**=' | '//=')
442
def p_expr_stmt(p):
443
"""expr_stmt : testlist ASSIGN testlist
444
| testlist """
445
if len(p) == 2:
446
# a list of expressions
447
p[0] = ast.Discard(p[1])
448
else:
449
p[0] = Assign(p[1], p[3])
450
451
def p_flow_stmt(p):
452
"flow_stmt : return_stmt"
453
p[0] = p[1]
454
455
# return_stmt: 'return' [testlist]
456
def p_return_stmt(p):
457
"return_stmt : RETURN testlist"
458
p[0] = ast.Return(p[2])
459
460
461
def p_compound_stmt(p):
462
"""compound_stmt : if_stmt
463
| funcdef"""
464
p[0] = p[1]
465
466
def p_if_stmt(p):
467
'if_stmt : IF test COLON suite'
468
p[0] = ast.If([(p[2], p[4])], None)
469
470
def p_suite(p):
471
"""suite : simple_stmt
472
| NEWLINE INDENT stmts DEDENT"""
473
if len(p) == 2:
474
p[0] = ast.Stmt(p[1])
475
else:
476
p[0] = ast.Stmt(p[3])
477
478
479
def p_stmts(p):
480
"""stmts : stmts stmt
481
| stmt"""
482
if len(p) == 3:
483
p[0] = p[1] + p[2]
484
else:
485
p[0] = p[1]
486
487
## No using Python's approach because Ply supports precedence
488
489
# comparison: expr (comp_op expr)*
490
# arith_expr: term (('+'|'-') term)*
491
# term: factor (('*'|'/'|'%'|'//') factor)*
492
# factor: ('+'|'-'|'~') factor | power
493
# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
494
495
def make_lt_compare((left, right)):
496
return ast.Compare(left, [('<', right),])
497
def make_gt_compare((left, right)):
498
return ast.Compare(left, [('>', right),])
499
def make_eq_compare((left, right)):
500
return ast.Compare(left, [('==', right),])
501
502
503
binary_ops = {
504
"+": ast.Add,
505
"-": ast.Sub,
506
"*": ast.Mul,
507
"/": ast.Div,
508
"<": make_lt_compare,
509
">": make_gt_compare,
510
"==": make_eq_compare,
511
}
512
unary_ops = {
513
"+": ast.UnaryAdd,
514
"-": ast.UnarySub,
515
}
516
precedence = (
517
("left", "EQ", "GT", "LT"),
518
("left", "PLUS", "MINUS"),
519
("left", "MULT", "DIV"),
520
)
521
522
def p_comparison(p):
523
"""comparison : comparison PLUS comparison
524
| comparison MINUS comparison
525
| comparison MULT comparison
526
| comparison DIV comparison
527
| comparison LT comparison
528
| comparison EQ comparison
529
| comparison GT comparison
530
| PLUS comparison
531
| MINUS comparison
532
| power"""
533
if len(p) == 4:
534
p[0] = binary_ops[p[2]]((p[1], p[3]))
535
elif len(p) == 3:
536
p[0] = unary_ops[p[1]](p[2])
537
else:
538
p[0] = p[1]
539
540
# power: atom trailer* ['**' factor]
541
# trailers enables function calls. I only allow one level of calls
542
# so this is 'trailer'
543
def p_power(p):
544
"""power : atom
545
| atom trailer"""
546
if len(p) == 2:
547
p[0] = p[1]
548
else:
549
if p[2][0] == "CALL":
550
p[0] = ast.CallFunc(p[1], p[2][1], None, None)
551
else:
552
raise AssertionError("not implemented")
553
554
def p_atom_name(p):
555
"""atom : NAME"""
556
p[0] = ast.Name(p[1])
557
558
def p_atom_number(p):
559
"""atom : NUMBER
560
| STRING"""
561
p[0] = ast.Const(p[1])
562
563
def p_atom_tuple(p):
564
"""atom : LPAR testlist RPAR"""
565
p[0] = p[2]
566
567
# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
568
def p_trailer(p):
569
"trailer : LPAR arglist RPAR"
570
p[0] = ("CALL", p[2])
571
572
# testlist: test (',' test)* [',']
573
# Contains shift/reduce error
574
def p_testlist(p):
575
"""testlist : testlist_multi COMMA
576
| testlist_multi """
577
if len(p) == 2:
578
p[0] = p[1]
579
else:
580
# May need to promote singleton to tuple
581
if isinstance(p[1], list):
582
p[0] = p[1]
583
else:
584
p[0] = [p[1]]
585
# Convert into a tuple?
586
if isinstance(p[0], list):
587
p[0] = ast.Tuple(p[0])
588
589
def p_testlist_multi(p):
590
"""testlist_multi : testlist_multi COMMA test
591
| test"""
592
if len(p) == 2:
593
# singleton
594
p[0] = p[1]
595
else:
596
if isinstance(p[1], list):
597
p[0] = p[1] + [p[3]]
598
else:
599
# singleton -> tuple
600
p[0] = [p[1], p[3]]
601
602
603
# test: or_test ['if' or_test 'else' test] | lambdef
604
# as I don't support 'and', 'or', and 'not' this works down to 'comparison'
605
def p_test(p):
606
"test : comparison"
607
p[0] = p[1]
608
609
610
611
# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
612
# XXX INCOMPLETE: this doesn't allow the trailing comma
613
def p_arglist(p):
614
"""arglist : arglist COMMA argument
615
| argument"""
616
if len(p) == 4:
617
p[0] = p[1] + [p[3]]
618
else:
619
p[0] = [p[1]]
620
621
# argument: test [gen_for] | test '=' test # Really [keyword '='] test
622
def p_argument(p):
623
"argument : test"
624
p[0] = p[1]
625
626
def p_error(p):
627
#print "Error!", repr(p)
628
raise SyntaxError(p)
629
630
631
class GardenSnakeParser(object):
632
def __init__(self, lexer = None):
633
if lexer is None:
634
lexer = IndentLexer()
635
self.lexer = lexer
636
self.parser = yacc.yacc(start="file_input_end")
637
638
def parse(self, code):
639
self.lexer.input(code)
640
result = self.parser.parse(lexer = self.lexer)
641
return ast.Module(None, result)
642
643
644
###### Code generation ######
645
646
from compiler import misc, syntax, pycodegen
647
648
class GardenSnakeCompiler(object):
649
def __init__(self):
650
self.parser = GardenSnakeParser()
651
def compile(self, code, filename="<string>"):
652
tree = self.parser.parse(code)
653
#print tree
654
misc.set_filename(filename, tree)
655
syntax.check(tree)
656
gen = pycodegen.ModuleCodeGenerator(tree)
657
code = gen.getCode()
658
return code
659
660
####### Test code #######
661
662
compile = GardenSnakeCompiler().compile
663
664
code = r"""
665
666
print('LET\'S TRY THIS \\OUT')
667
668
#Comment here
669
def x(a):
670
print('called with',a)
671
if a == 1:
672
return 2
673
if a*2 > 10: return 999 / 4
674
# Another comment here
675
676
return a+2*3
677
678
ints = (1, 2,
679
3, 4,
680
5)
681
print('mutiline-expression', ints)
682
683
t = 4+1/3*2+6*(9-5+1)
684
print('predence test; should be 34+2/3:', t, t==(34+2/3))
685
686
print('numbers', 1,2,3,4,5)
687
if 1:
688
8
689
a=9
690
print(x(a))
691
692
print(x(1))
693
print(x(2))
694
print(x(8),'3')
695
print('this is decimal', 1/5)
696
print('BIG DECIMAL', 1.234567891234567e12345)
697
698
"""
699
700
# Set up the GardenSnake run-time environment
701
def print_(*args):
702
print "-->", " ".join(map(str,args))
703
704
globals()["print"] = print_
705
706
compiled_code = compile(code)
707
708
exec compiled_code in globals()
709
print "Done"
710
711