Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/cases_generator/parser.py
12 views
1
"""Parser for bytecodes.inst."""
2
3
from dataclasses import dataclass, field
4
from typing import NamedTuple, Callable, TypeVar, Literal
5
6
import lexer as lx
7
from plexer import PLexer
8
9
10
P = TypeVar("P", bound="Parser")
11
N = TypeVar("N", bound="Node")
12
13
14
def contextual(func: Callable[[P], N | None]) -> Callable[[P], N | None]:
15
# Decorator to wrap grammar methods.
16
# Resets position if `func` returns None.
17
def contextual_wrapper(self: P) -> N | None:
18
begin = self.getpos()
19
res = func(self)
20
if res is None:
21
self.setpos(begin)
22
return
23
end = self.getpos()
24
res.context = Context(begin, end, self)
25
return res
26
27
return contextual_wrapper
28
29
30
class Context(NamedTuple):
31
begin: int
32
end: int
33
owner: PLexer
34
35
def __repr__(self):
36
return f"<{self.owner.filename}: {self.begin}-{self.end}>"
37
38
39
@dataclass
40
class Node:
41
context: Context | None = field(init=False, compare=False, default=None)
42
43
@property
44
def text(self) -> str:
45
return self.to_text()
46
47
def to_text(self, dedent: int = 0) -> str:
48
context = self.context
49
if not context:
50
return ""
51
return lx.to_text(self.tokens, dedent)
52
53
@property
54
def tokens(self) -> list[lx.Token]:
55
context = self.context
56
if not context:
57
return []
58
tokens = context.owner.tokens
59
begin = context.begin
60
end = context.end
61
return tokens[begin:end]
62
63
64
@dataclass
65
class Block(Node):
66
# This just holds a context which has the list of tokens.
67
pass
68
69
70
@dataclass
71
class StackEffect(Node):
72
name: str
73
type: str = "" # Optional `:type`
74
cond: str = "" # Optional `if (cond)`
75
size: str = "" # Optional `[size]`
76
# Note: size cannot be combined with type or cond
77
78
79
@dataclass
80
class Expression(Node):
81
size: str
82
83
84
@dataclass
85
class CacheEffect(Node):
86
name: str
87
size: int
88
89
90
@dataclass
91
class OpName(Node):
92
name: str
93
94
95
InputEffect = StackEffect | CacheEffect
96
OutputEffect = StackEffect
97
UOp = OpName | CacheEffect
98
99
100
@dataclass
101
class InstHeader(Node):
102
override: bool
103
register: bool
104
kind: Literal["inst", "op"]
105
name: str
106
inputs: list[InputEffect]
107
outputs: list[OutputEffect]
108
109
110
@dataclass
111
class InstDef(Node):
112
override: bool
113
register: bool
114
kind: Literal["inst", "op"]
115
name: str
116
inputs: list[InputEffect]
117
outputs: list[OutputEffect]
118
block: Block
119
120
121
@dataclass
122
class Macro(Node):
123
name: str
124
uops: list[UOp]
125
126
127
@dataclass
128
class Family(Node):
129
name: str
130
size: str # Variable giving the cache size in code units
131
members: list[str]
132
133
@dataclass
134
class Pseudo(Node):
135
name: str
136
targets: list[str] # opcodes this can be replaced by
137
138
139
class Parser(PLexer):
140
@contextual
141
def definition(self) -> InstDef | Macro | Pseudo | Family | None:
142
if inst := self.inst_def():
143
return inst
144
if macro := self.macro_def():
145
return macro
146
if family := self.family_def():
147
return family
148
if pseudo := self.pseudo_def():
149
return pseudo
150
151
@contextual
152
def inst_def(self) -> InstDef | None:
153
if hdr := self.inst_header():
154
if block := self.block():
155
return InstDef(
156
hdr.override, hdr.register, hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block
157
)
158
raise self.make_syntax_error("Expected block")
159
return None
160
161
@contextual
162
def inst_header(self) -> InstHeader | None:
163
# [override] inst(NAME)
164
# | [override] [register] inst(NAME, (inputs -- outputs))
165
# | [override] [register] op(NAME, (inputs -- outputs))
166
# TODO: Make INST a keyword in the lexer.
167
override = bool(self.expect(lx.OVERRIDE))
168
register = bool(self.expect(lx.REGISTER))
169
if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("inst", "op"):
170
if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)):
171
name = tkn.text
172
if self.expect(lx.COMMA):
173
inp, outp = self.io_effect()
174
if self.expect(lx.RPAREN):
175
if (tkn := self.peek()) and tkn.kind == lx.LBRACE:
176
return InstHeader(override, register, kind, name, inp, outp)
177
return None
178
179
def io_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]:
180
# '(' [inputs] '--' [outputs] ')'
181
if self.expect(lx.LPAREN):
182
inputs = self.inputs() or []
183
if self.expect(lx.MINUSMINUS):
184
outputs = self.outputs() or []
185
if self.expect(lx.RPAREN):
186
return inputs, outputs
187
raise self.make_syntax_error("Expected stack effect")
188
189
def inputs(self) -> list[InputEffect] | None:
190
# input (',' input)*
191
here = self.getpos()
192
if inp := self.input():
193
near = self.getpos()
194
if self.expect(lx.COMMA):
195
if rest := self.inputs():
196
return [inp] + rest
197
self.setpos(near)
198
return [inp]
199
self.setpos(here)
200
return None
201
202
@contextual
203
def input(self) -> InputEffect | None:
204
return self.cache_effect() or self.stack_effect()
205
206
def outputs(self) -> list[OutputEffect] | None:
207
# output (, output)*
208
here = self.getpos()
209
if outp := self.output():
210
near = self.getpos()
211
if self.expect(lx.COMMA):
212
if rest := self.outputs():
213
return [outp] + rest
214
self.setpos(near)
215
return [outp]
216
self.setpos(here)
217
return None
218
219
@contextual
220
def output(self) -> OutputEffect | None:
221
return self.stack_effect()
222
223
@contextual
224
def cache_effect(self) -> CacheEffect | None:
225
# IDENTIFIER '/' NUMBER
226
if tkn := self.expect(lx.IDENTIFIER):
227
if self.expect(lx.DIVIDE):
228
num = self.require(lx.NUMBER).text
229
try:
230
size = int(num)
231
except ValueError:
232
raise self.make_syntax_error(f"Expected integer, got {num!r}")
233
else:
234
return CacheEffect(tkn.text, size)
235
236
@contextual
237
def stack_effect(self) -> StackEffect | None:
238
# IDENTIFIER [':' IDENTIFIER] ['if' '(' expression ')']
239
# | IDENTIFIER '[' expression ']'
240
if tkn := self.expect(lx.IDENTIFIER):
241
type_text = ""
242
if self.expect(lx.COLON):
243
type_text = self.require(lx.IDENTIFIER).text.strip()
244
cond_text = ""
245
if self.expect(lx.IF):
246
self.require(lx.LPAREN)
247
if not (cond := self.expression()):
248
raise self.make_syntax_error("Expected condition")
249
self.require(lx.RPAREN)
250
cond_text = cond.text.strip()
251
size_text = ""
252
if self.expect(lx.LBRACKET):
253
if type_text or cond_text:
254
raise self.make_syntax_error("Unexpected [")
255
if not (size := self.expression()):
256
raise self.make_syntax_error("Expected expression")
257
self.require(lx.RBRACKET)
258
type_text = "PyObject **"
259
size_text = size.text.strip()
260
return StackEffect(tkn.text, type_text, cond_text, size_text)
261
262
@contextual
263
def expression(self) -> Expression | None:
264
tokens: list[lx.Token] = []
265
level = 1
266
while tkn := self.peek():
267
if tkn.kind in (lx.LBRACKET, lx.LPAREN):
268
level += 1
269
elif tkn.kind in (lx.RBRACKET, lx.RPAREN):
270
level -= 1
271
if level == 0:
272
break
273
tokens.append(tkn)
274
self.next()
275
if not tokens:
276
return None
277
return Expression(lx.to_text(tokens).strip())
278
279
# def ops(self) -> list[OpName] | None:
280
# if op := self.op():
281
# ops = [op]
282
# while self.expect(lx.PLUS):
283
# if op := self.op():
284
# ops.append(op)
285
# return ops
286
287
@contextual
288
def op(self) -> OpName | None:
289
if tkn := self.expect(lx.IDENTIFIER):
290
return OpName(tkn.text)
291
292
@contextual
293
def macro_def(self) -> Macro | None:
294
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "macro":
295
if self.expect(lx.LPAREN):
296
if tkn := self.expect(lx.IDENTIFIER):
297
if self.expect(lx.RPAREN):
298
if self.expect(lx.EQUALS):
299
if uops := self.uops():
300
self.require(lx.SEMI)
301
res = Macro(tkn.text, uops)
302
return res
303
304
def uops(self) -> list[UOp] | None:
305
if uop := self.uop():
306
uops = [uop]
307
while self.expect(lx.PLUS):
308
if uop := self.uop():
309
uops.append(uop)
310
else:
311
raise self.make_syntax_error("Expected op name or cache effect")
312
return uops
313
314
@contextual
315
def uop(self) -> UOp | None:
316
if tkn := self.expect(lx.IDENTIFIER):
317
if self.expect(lx.DIVIDE):
318
if num := self.expect(lx.NUMBER):
319
try:
320
size = int(num.text)
321
except ValueError:
322
raise self.make_syntax_error(
323
f"Expected integer, got {num.text!r}"
324
)
325
else:
326
return CacheEffect(tkn.text, size)
327
raise self.make_syntax_error("Expected integer")
328
else:
329
return OpName(tkn.text)
330
331
@contextual
332
def family_def(self) -> Family | None:
333
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "family":
334
size = None
335
if self.expect(lx.LPAREN):
336
if tkn := self.expect(lx.IDENTIFIER):
337
if self.expect(lx.COMMA):
338
if not (size := self.expect(lx.IDENTIFIER)):
339
raise self.make_syntax_error("Expected identifier")
340
if self.expect(lx.RPAREN):
341
if self.expect(lx.EQUALS):
342
if not self.expect(lx.LBRACE):
343
raise self.make_syntax_error("Expected {")
344
if members := self.members():
345
if self.expect(lx.RBRACE) and self.expect(lx.SEMI):
346
return Family(
347
tkn.text, size.text if size else "", members
348
)
349
return None
350
351
@contextual
352
def pseudo_def(self) -> Pseudo | None:
353
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "pseudo":
354
size = None
355
if self.expect(lx.LPAREN):
356
if tkn := self.expect(lx.IDENTIFIER):
357
if self.expect(lx.RPAREN):
358
if self.expect(lx.EQUALS):
359
if not self.expect(lx.LBRACE):
360
raise self.make_syntax_error("Expected {")
361
if members := self.members():
362
if self.expect(lx.RBRACE) and self.expect(lx.SEMI):
363
return Pseudo(
364
tkn.text, members
365
)
366
return None
367
368
def members(self) -> list[str] | None:
369
here = self.getpos()
370
if tkn := self.expect(lx.IDENTIFIER):
371
members = [tkn.text]
372
while self.expect(lx.COMMA):
373
if tkn := self.expect(lx.IDENTIFIER):
374
members.append(tkn.text)
375
else:
376
break
377
peek = self.peek()
378
if not peek or peek.kind != lx.RBRACE:
379
raise self.make_syntax_error("Expected comma or right paren")
380
return members
381
self.setpos(here)
382
return None
383
384
@contextual
385
def block(self) -> Block | None:
386
if self.c_blob():
387
return Block()
388
389
def c_blob(self) -> list[lx.Token]:
390
tokens: list[lx.Token] = []
391
level = 0
392
while tkn := self.next(raw=True):
393
tokens.append(tkn)
394
if tkn.kind in (lx.LBRACE, lx.LPAREN, lx.LBRACKET):
395
level += 1
396
elif tkn.kind in (lx.RBRACE, lx.RPAREN, lx.RBRACKET):
397
level -= 1
398
if level <= 0:
399
break
400
return tokens
401
402
403
if __name__ == "__main__":
404
import sys
405
406
if sys.argv[1:]:
407
filename = sys.argv[1]
408
if filename == "-c" and sys.argv[2:]:
409
src = sys.argv[2]
410
filename = "<string>"
411
else:
412
with open(filename, "r") as f:
413
src = f.read()
414
srclines = src.splitlines()
415
begin = srclines.index("// BEGIN BYTECODES //")
416
end = srclines.index("// END BYTECODES //")
417
src = "\n".join(srclines[begin + 1 : end])
418
else:
419
filename = "<default>"
420
src = "if (x) { x.foo; // comment\n}"
421
parser = Parser(src, filename)
422
x = parser.definition()
423
print(x)
424
425