CoCalc -- GardenSnake.py

GitHub Repository: emscripten-core/emscripten
Path: blob/main/third_party/ply/example/GardenSnake/GardenSnake.py
⁷⁰⁸⁷ views
1
# GardenSnake - a parser generator demonstration program
2
#
3
# This implements a modified version of a subset of Python:
4
#  - only 'def', 'return' and 'if' statements
5
#  - 'if' only has 'then' clause (no elif nor else)
6
#  - single-quoted strings only, content in raw format
7
#  - numbers are decimal.Decimal instances (not integers or floats)
8
#  - no print statment; use the built-in 'print' function
9
#  - only < > == + - / * implemented (and unary + -)
10
#  - assignment and tuple assignment work
11
#  - no generators of any sort
12
#  - no ... well, no quite a lot
13

14
# Why?  I'm thinking about a new indentation-based configuration
15
# language for a project and wanted to figure out how to do it.  Once
16
# I got that working I needed a way to test it out.  My original AST
17
# was dumb so I decided to target Python's AST and compile it into
18
# Python code.  Plus, it's pretty cool that it only took a day or so
19
# from sitting down with Ply to having working code.
20

21
# This uses David Beazley's Ply from http://www.dabeaz.com/ply/
22

23
# This work is hereby released into the Public Domain. To view a copy of
24
# the public domain dedication, visit
25
# http://creativecommons.org/licenses/publicdomain/ or send a letter to
26
# Creative Commons, 543 Howard Street, 5th Floor, San Francisco,
27
# California, 94105, USA.
28
#
29
# Portions of this work are derived from Python's Grammar definition
30
# and may be covered under the Python copyright and license
31
#
32
#          Andrew Dalke / Dalke Scientific Software, LLC
33
#             30 August 2006 / Cape Town, South Africa
34

35
# Changelog:
36
#  30 August - added link to CC license; removed the "swapcase" encoding
37

38
# Modifications for inclusion in PLY distribution
39
import sys
40
sys.path.insert(0,"../..")
41
from ply import *
42

43
##### Lexer ######
44
#import lex
45
import decimal
46

47
tokens = (
48
    'DEF',
49
    'IF',
50
    'NAME',
51
    'NUMBER',  # Python decimals
52
    'STRING',  # single quoted strings only; syntax of raw strings
53
    'LPAR',
54
    'RPAR',
55
    'COLON',
56
    'EQ',
57
    'ASSIGN',
58
    'LT',
59
    'GT',
60
    'PLUS',
61
    'MINUS',
62
    'MULT',
63
    'DIV',
64
    'RETURN',
65
    'WS',
66
    'NEWLINE',
67
    'COMMA',
68
    'SEMICOLON',
69
    'INDENT',
70
    'DEDENT',
71
    'ENDMARKER',
72
    )
73

74
#t_NUMBER = r'\d+'
75
# taken from decmial.py but without the leading sign
76
def t_NUMBER(t):
77
    r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
78
    t.value = decimal.Decimal(t.value)
79
    return t
80

81
def t_STRING(t):
82
    r"'([^\\']+|\\'|\\\\)*'"  # I think this is right ...
83
    t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun
84
    return t
85

86
t_COLON = r':'
87
t_EQ = r'=='
88
t_ASSIGN = r'='
89
t_LT = r'<'
90
t_GT = r'>'
91
t_PLUS = r'\+'
92
t_MINUS = r'-'
93
t_MULT = r'\*'
94
t_DIV = r'/'
95
t_COMMA = r','
96
t_SEMICOLON = r';'
97

98
# Ply nicely documented how to do this.
99

100
RESERVED = {
101
  "def": "DEF",
102
  "if": "IF",
103
  "return": "RETURN",
104
  }
105

106
def t_NAME(t):
107
    r'[a-zA-Z_][a-zA-Z0-9_]*'
108
    t.type = RESERVED.get(t.value, "NAME")
109
    return t
110

111
# Putting this before t_WS let it consume lines with only comments in
112
# them so the latter code never sees the WS part.  Not consuming the
113
# newline.  Needed for "if 1: #comment"
114
def t_comment(t):
115
    r"[ ]*\043[^\n]*"  # \043 is '#'
116
    pass
117

118

119
# Whitespace
120
def t_WS(t):
121
    r' [ ]+ '
122
    if t.lexer.at_line_start and t.lexer.paren_count == 0:
123
        return t
124

125
# Don't generate newline tokens when inside of parenthesis, eg
126
#   a = (1,
127
#        2, 3)
128
def t_newline(t):
129
    r'\n+'
130
    t.lexer.lineno += len(t.value)
131
    t.type = "NEWLINE"
132
    if t.lexer.paren_count == 0:
133
        return t
134

135
def t_LPAR(t):
136
    r'\('
137
    t.lexer.paren_count += 1
138
    return t
139

140
def t_RPAR(t):
141
    r'\)'
142
    # check for underflow?  should be the job of the parser
143
    t.lexer.paren_count -= 1
144
    return t
145

146

147
def t_error(t):
148
    raise SyntaxError("Unknown symbol %r" % (t.value[0],))
149
    print "Skipping", repr(t.value[0])
150
    t.lexer.skip(1)
151

152
## I implemented INDENT / DEDENT generation as a post-processing filter
153

154
# The original lex token stream contains WS and NEWLINE characters.
155
# WS will only occur before any other tokens on a line.
156

157
# I have three filters.  One tags tokens by adding two attributes.
158
# "must_indent" is True if the token must be indented from the
159
# previous code.  The other is "at_line_start" which is True for WS
160
# and the first non-WS/non-NEWLINE on a line.  It flags the check so
161
# see if the new line has changed indication level.
162

163
# Python's syntax has three INDENT states
164
#  0) no colon hence no need to indent
165
#  1) "if 1: go()" - simple statements have a COLON but no need for an indent
166
#  2) "if 1:\n  go()" - complex statements have a COLON NEWLINE and must indent
167
NO_INDENT = 0
168
MAY_INDENT = 1
169
MUST_INDENT = 2
170

171
# only care about whitespace at the start of a line
172
def track_tokens_filter(lexer, tokens):
173
    lexer.at_line_start = at_line_start = True
174
    indent = NO_INDENT
175
    saw_colon = False
176
    for token in tokens:
177
        token.at_line_start = at_line_start
178

179
        if token.type == "COLON":
180
            at_line_start = False
181
            indent = MAY_INDENT
182
            token.must_indent = False
183
            
184
        elif token.type == "NEWLINE":
185
            at_line_start = True
186
            if indent == MAY_INDENT:
187
                indent = MUST_INDENT
188
            token.must_indent = False
189

190
        elif token.type == "WS":
191
            assert token.at_line_start == True
192
            at_line_start = True
193
            token.must_indent = False
194

195
        else:
196
            # A real token; only indent after COLON NEWLINE
197
            if indent == MUST_INDENT:
198
                token.must_indent = True
199
            else:
200
                token.must_indent = False
201
            at_line_start = False
202
            indent = NO_INDENT
203

204
        yield token
205
        lexer.at_line_start = at_line_start
206

207
def _new_token(type, lineno):
208
    tok = lex.LexToken()
209
    tok.type = type
210
    tok.value = None
211
    tok.lineno = lineno
212
    return tok
213

214
# Synthesize a DEDENT tag
215
def DEDENT(lineno):
216
    return _new_token("DEDENT", lineno)
217

218
# Synthesize an INDENT tag
219
def INDENT(lineno):
220
    return _new_token("INDENT", lineno)
221

222

223
# Track the indentation level and emit the right INDENT / DEDENT events.
224
def indentation_filter(tokens):
225
    # A stack of indentation levels; will never pop item 0
226
    levels = [0]
227
    token = None
228
    depth = 0
229
    prev_was_ws = False
230
    for token in tokens:
231
##        if 1:
232
##            print "Process", token,
233
##            if token.at_line_start:
234
##                print "at_line_start",
235
##            if token.must_indent:
236
##                print "must_indent",
237
##            print
238
                
239
        # WS only occurs at the start of the line
240
        # There may be WS followed by NEWLINE so
241
        # only track the depth here.  Don't indent/dedent
242
        # until there's something real.
243
        if token.type == "WS":
244
            assert depth == 0
245
            depth = len(token.value)
246
            prev_was_ws = True
247
            # WS tokens are never passed to the parser
248
            continue
249

250
        if token.type == "NEWLINE":
251
            depth = 0
252
            if prev_was_ws or token.at_line_start:
253
                # ignore blank lines
254
                continue
255
            # pass the other cases on through
256
            yield token
257
            continue
258

259
        # then it must be a real token (not WS, not NEWLINE)
260
        # which can affect the indentation level
261

262
        prev_was_ws = False
263
        if token.must_indent:
264
            # The current depth must be larger than the previous level
265
            if not (depth > levels[-1]):
266
                raise IndentationError("expected an indented block")
267

268
            levels.append(depth)
269
            yield INDENT(token.lineno)
270

271
        elif token.at_line_start:
272
            # Must be on the same level or one of the previous levels
273
            if depth == levels[-1]:
274
                # At the same level
275
                pass
276
            elif depth > levels[-1]:
277
                raise IndentationError("indentation increase but not in new block")
278
            else:
279
                # Back up; but only if it matches a previous level
280
                try:
281
                    i = levels.index(depth)
282
                except ValueError:
283
                    raise IndentationError("inconsistent indentation")
284
                for _ in range(i+1, len(levels)):
285
                    yield DEDENT(token.lineno)
286
                    levels.pop()
287

288
        yield token
289

290
    ### Finished processing ###
291

292
    # Must dedent any remaining levels
293
    if len(levels) > 1:
294
        assert token is not None
295
        for _ in range(1, len(levels)):
296
            yield DEDENT(token.lineno)
297
    
298

299
# The top-level filter adds an ENDMARKER, if requested.
300
# Python's grammar uses it.
301
def filter(lexer, add_endmarker = True):
302
    token = None
303
    tokens = iter(lexer.token, None)
304
    tokens = track_tokens_filter(lexer, tokens)
305
    for token in indentation_filter(tokens):
306
        yield token
307

308
    if add_endmarker:
309
        lineno = 1
310
        if token is not None:
311
            lineno = token.lineno
312
        yield _new_token("ENDMARKER", lineno)
313

314
# Combine Ply and my filters into a new lexer
315

316
class IndentLexer(object):
317
    def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
318
        self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags)
319
        self.token_stream = None
320
    def input(self, s, add_endmarker=True):
321
        self.lexer.paren_count = 0
322
        self.lexer.input(s)
323
        self.token_stream = filter(self.lexer, add_endmarker)
324
    def token(self):
325
        try:
326
            return self.token_stream.next()
327
        except StopIteration:
328
            return None
329

330
##########   Parser (tokens -> AST) ######
331

332
# also part of Ply
333
#import yacc
334

335
# I use the Python AST
336
from compiler import ast
337

338
# Helper function
339
def Assign(left, right):
340
    names = []
341
    if isinstance(left, ast.Name):
342
        # Single assignment on left
343
        return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
344
    elif isinstance(left, ast.Tuple):
345
        # List of things - make sure they are Name nodes
346
        names = []
347
        for child in left.getChildren():
348
            if not isinstance(child, ast.Name):
349
                raise SyntaxError("that assignment not supported")
350
            names.append(child.name)
351
        ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
352
        return ast.Assign([ast.AssTuple(ass_list)], right)
353
    else:
354
        raise SyntaxError("Can't do that yet")
355

356

357
# The grammar comments come from Python's Grammar/Grammar file
358

359
## NB: compound_stmt in single_input is followed by extra NEWLINE!
360
# file_input: (NEWLINE | stmt)* ENDMARKER
361
def p_file_input_end(p):
362
    """file_input_end : file_input ENDMARKER"""
363
    p[0] = ast.Stmt(p[1])
364
def p_file_input(p):
365
    """file_input : file_input NEWLINE
366
                  | file_input stmt
367
                  | NEWLINE
368
                  | stmt"""
369
    if isinstance(p[len(p)-1], basestring):
370
        if len(p) == 3:
371
            p[0] = p[1]
372
        else:
373
            p[0] = [] # p == 2 --> only a blank line
374
    else:
375
        if len(p) == 3:
376
            p[0] = p[1] + p[2]
377
        else:
378
            p[0] = p[1]
379
            
380

381
# funcdef: [decorators] 'def' NAME parameters ':' suite
382
# ignoring decorators
383
def p_funcdef(p):
384
    "funcdef : DEF NAME parameters COLON suite"
385
    p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5])
386
    
387
# parameters: '(' [varargslist] ')'
388
def p_parameters(p):
389
    """parameters : LPAR RPAR
390
                  | LPAR varargslist RPAR"""
391
    if len(p) == 3:
392
        p[0] = []
393
    else:
394
        p[0] = p[2]
395
    
396

397
# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | 
398
# highly simplified
399
def p_varargslist(p):
400
    """varargslist : varargslist COMMA NAME
401
                   | NAME"""
402
    if len(p) == 4:
403
        p[0] = p[1] + p[3]
404
    else:
405
        p[0] = [p[1]]
406

407
# stmt: simple_stmt | compound_stmt
408
def p_stmt_simple(p):
409
    """stmt : simple_stmt"""
410
    # simple_stmt is a list
411
    p[0] = p[1]
412
    
413
def p_stmt_compound(p):
414
    """stmt : compound_stmt"""
415
    p[0] = [p[1]]
416

417
# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
418
def p_simple_stmt(p):
419
    """simple_stmt : small_stmts NEWLINE
420
                   | small_stmts SEMICOLON NEWLINE"""
421
    p[0] = p[1]
422

423
def p_small_stmts(p):
424
    """small_stmts : small_stmts SEMICOLON small_stmt
425
                   | small_stmt"""
426
    if len(p) == 4:
427
        p[0] = p[1] + [p[3]]
428
    else:
429
        p[0] = [p[1]]
430

431
# small_stmt: expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
432
#    import_stmt | global_stmt | exec_stmt | assert_stmt
433
def p_small_stmt(p):
434
    """small_stmt : flow_stmt
435
                  | expr_stmt"""
436
    p[0] = p[1]
437

438
# expr_stmt: testlist (augassign (yield_expr|testlist) |
439
#                      ('=' (yield_expr|testlist))*)
440
# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
441
#             '<<=' | '>>=' | '**=' | '//=')
442
def p_expr_stmt(p):
443
    """expr_stmt : testlist ASSIGN testlist
444
                 | testlist """
445
    if len(p) == 2:
446
        # a list of expressions
447
        p[0] = ast.Discard(p[1])
448
    else:
449
        p[0] = Assign(p[1], p[3])
450

451
def p_flow_stmt(p):
452
    "flow_stmt : return_stmt"
453
    p[0] = p[1]
454

455
# return_stmt: 'return' [testlist]
456
def p_return_stmt(p):
457
    "return_stmt : RETURN testlist"
458
    p[0] = ast.Return(p[2])
459

460

461
def p_compound_stmt(p):
462
    """compound_stmt : if_stmt
463
                     | funcdef"""
464
    p[0] = p[1]
465

466
def p_if_stmt(p):
467
    'if_stmt : IF test COLON suite'
468
    p[0] = ast.If([(p[2], p[4])], None)
469

470
def p_suite(p):
471
    """suite : simple_stmt
472
             | NEWLINE INDENT stmts DEDENT"""
473
    if len(p) == 2:
474
        p[0] = ast.Stmt(p[1])
475
    else:
476
        p[0] = ast.Stmt(p[3])
477
    
478

479
def p_stmts(p):
480
    """stmts : stmts stmt
481
             | stmt"""
482
    if len(p) == 3:
483
        p[0] = p[1] + p[2]
484
    else:
485
        p[0] = p[1]
486

487
## No using Python's approach because Ply supports precedence
488

489
# comparison: expr (comp_op expr)*
490
# arith_expr: term (('+'|'-') term)*
491
# term: factor (('*'|'/'|'%'|'//') factor)*
492
# factor: ('+'|'-'|'~') factor | power
493
# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
494

495
def make_lt_compare((left, right)):
496
    return ast.Compare(left, [('<', right),])
497
def make_gt_compare((left, right)):
498
    return ast.Compare(left, [('>', right),])
499
def make_eq_compare((left, right)):
500
    return ast.Compare(left, [('==', right),])
501

502

503
binary_ops = {
504
    "+": ast.Add,
505
    "-": ast.Sub,
506
    "*": ast.Mul,
507
    "/": ast.Div,
508
    "<": make_lt_compare,
509
    ">": make_gt_compare,
510
    "==": make_eq_compare,
511
}
512
unary_ops = {
513
    "+": ast.UnaryAdd,
514
    "-": ast.UnarySub,
515
    }
516
precedence = (
517
    ("left", "EQ", "GT", "LT"),
518
    ("left", "PLUS", "MINUS"),
519
    ("left", "MULT", "DIV"),
520
    )
521

522
def p_comparison(p):
523
    """comparison : comparison PLUS comparison
524
                  | comparison MINUS comparison
525
                  | comparison MULT comparison
526
                  | comparison DIV comparison
527
                  | comparison LT comparison
528
                  | comparison EQ comparison
529
                  | comparison GT comparison
530
                  | PLUS comparison
531
                  | MINUS comparison
532
                  | power"""
533
    if len(p) == 4:
534
        p[0] = binary_ops[p[2]]((p[1], p[3]))
535
    elif len(p) == 3:
536
        p[0] = unary_ops[p[1]](p[2])
537
    else:
538
        p[0] = p[1]
539
                  
540
# power: atom trailer* ['**' factor]
541
# trailers enables function calls.  I only allow one level of calls
542
# so this is 'trailer'
543
def p_power(p):
544
    """power : atom
545
             | atom trailer"""
546
    if len(p) == 2:
547
        p[0] = p[1]
548
    else:
549
        if p[2][0] == "CALL":
550
            p[0] = ast.CallFunc(p[1], p[2][1], None, None)
551
        else:
552
            raise AssertionError("not implemented")
553

554
def p_atom_name(p):
555
    """atom : NAME"""
556
    p[0] = ast.Name(p[1])
557

558
def p_atom_number(p):
559
    """atom : NUMBER
560
            | STRING"""
561
    p[0] = ast.Const(p[1])
562

563
def p_atom_tuple(p):
564
    """atom : LPAR testlist RPAR"""
565
    p[0] = p[2]
566

567
# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
568
def p_trailer(p):
569
    "trailer : LPAR arglist RPAR"
570
    p[0] = ("CALL", p[2])
571

572
# testlist: test (',' test)* [',']
573
# Contains shift/reduce error
574
def p_testlist(p):
575
    """testlist : testlist_multi COMMA
576
                | testlist_multi """
577
    if len(p) == 2:
578
        p[0] = p[1]
579
    else:
580
        # May need to promote singleton to tuple
581
        if isinstance(p[1], list):
582
            p[0] = p[1]
583
        else:
584
            p[0] = [p[1]]
585
    # Convert into a tuple?
586
    if isinstance(p[0], list):
587
        p[0] = ast.Tuple(p[0])
588

589
def p_testlist_multi(p):
590
    """testlist_multi : testlist_multi COMMA test
591
                      | test"""
592
    if len(p) == 2:
593
        # singleton
594
        p[0] = p[1]
595
    else:
596
        if isinstance(p[1], list):
597
            p[0] = p[1] + [p[3]]
598
        else:
599
            # singleton -> tuple
600
            p[0] = [p[1], p[3]]
601

602

603
# test: or_test ['if' or_test 'else' test] | lambdef
604
#  as I don't support 'and', 'or', and 'not' this works down to 'comparison'
605
def p_test(p):
606
    "test : comparison"
607
    p[0] = p[1]
608
    
609

610

611
# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
612
# XXX INCOMPLETE: this doesn't allow the trailing comma
613
def p_arglist(p):
614
    """arglist : arglist COMMA argument
615
               | argument"""
616
    if len(p) == 4:
617
        p[0] = p[1] + [p[3]]
618
    else:
619
        p[0] = [p[1]]
620

621
# argument: test [gen_for] | test '=' test  # Really [keyword '='] test
622
def p_argument(p):
623
    "argument : test"
624
    p[0] = p[1]
625

626
def p_error(p):
627
    #print "Error!", repr(p)
628
    raise SyntaxError(p)
629

630

631
class GardenSnakeParser(object):
632
    def __init__(self, lexer = None):
633
        if lexer is None:
634
            lexer = IndentLexer()
635
        self.lexer = lexer
636
        self.parser = yacc.yacc(start="file_input_end")
637

638
    def parse(self, code):
639
        self.lexer.input(code)
640
        result = self.parser.parse(lexer = self.lexer)
641
        return ast.Module(None, result)
642

643

644
###### Code generation ######
645
    
646
from compiler import misc, syntax, pycodegen
647

648
class GardenSnakeCompiler(object):
649
    def __init__(self):
650
        self.parser = GardenSnakeParser()
651
    def compile(self, code, filename="<string>"):
652
        tree = self.parser.parse(code)
653
        #print  tree
654
        misc.set_filename(filename, tree)
655
        syntax.check(tree)
656
        gen = pycodegen.ModuleCodeGenerator(tree)
657
        code = gen.getCode()
658
        return code
659

660
####### Test code #######
661
    
662
compile = GardenSnakeCompiler().compile
663

664
code = r"""
665

666
print('LET\'S TRY THIS \\OUT')
667
  
668
#Comment here
669
def x(a):
670
    print('called with',a)
671
    if a == 1:
672
        return 2
673
    if a*2 > 10: return 999 / 4
674
        # Another comment here
675

676
    return a+2*3
677

678
ints = (1, 2,
679
   3, 4,
680
5)
681
print('mutiline-expression', ints)
682

683
t = 4+1/3*2+6*(9-5+1)
684
print('predence test; should be 34+2/3:', t, t==(34+2/3))
685

686
print('numbers', 1,2,3,4,5)
687
if 1:
688
 8
689
 a=9
690
 print(x(a))
691

692
print(x(1))
693
print(x(2))
694
print(x(8),'3')
695
print('this is decimal', 1/5)
696
print('BIG DECIMAL', 1.234567891234567e12345)
697

698
"""
699

700
# Set up the GardenSnake run-time environment
701
def print_(*args):
702
    print "-->", " ".join(map(str,args))
703

704
globals()["print"] = print_
705

706
compiled_code = compile(code)
707

708
exec compiled_code in globals()
709
print "Done"
710

711
Product

Resources

Company