Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/third_party/ply/example/ansic/clex.py
7087 views
1
# ----------------------------------------------------------------------
2
# clex.py
3
#
4
# A lexer for ANSI C.
5
# ----------------------------------------------------------------------
6
7
import sys
8
sys.path.insert(0,"../..")
9
10
import ply.lex as lex
11
12
# Reserved words
13
reserved = (
14
'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
15
'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
16
'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
17
'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
18
)
19
20
tokens = reserved + (
21
# Literals (identifier, integer constant, float constant, string constant, char const)
22
'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
23
24
# Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
25
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
26
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
27
'LOR', 'LAND', 'LNOT',
28
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
29
30
# Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
31
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
32
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
33
34
# Increment/decrement (++,--)
35
'PLUSPLUS', 'MINUSMINUS',
36
37
# Structure dereference (->)
38
'ARROW',
39
40
# Conditional operator (?)
41
'CONDOP',
42
43
# Delimeters ( ) [ ] { } , . ; :
44
'LPAREN', 'RPAREN',
45
'LBRACKET', 'RBRACKET',
46
'LBRACE', 'RBRACE',
47
'COMMA', 'PERIOD', 'SEMI', 'COLON',
48
49
# Ellipsis (...)
50
'ELLIPSIS',
51
)
52
53
# Completely ignored characters
54
t_ignore = ' \t\x0c'
55
56
# Newlines
57
def t_NEWLINE(t):
58
r'\n+'
59
t.lexer.lineno += t.value.count("\n")
60
61
# Operators
62
t_PLUS = r'\+'
63
t_MINUS = r'-'
64
t_TIMES = r'\*'
65
t_DIVIDE = r'/'
66
t_MOD = r'%'
67
t_OR = r'\|'
68
t_AND = r'&'
69
t_NOT = r'~'
70
t_XOR = r'\^'
71
t_LSHIFT = r'<<'
72
t_RSHIFT = r'>>'
73
t_LOR = r'\|\|'
74
t_LAND = r'&&'
75
t_LNOT = r'!'
76
t_LT = r'<'
77
t_GT = r'>'
78
t_LE = r'<='
79
t_GE = r'>='
80
t_EQ = r'=='
81
t_NE = r'!='
82
83
# Assignment operators
84
85
t_EQUALS = r'='
86
t_TIMESEQUAL = r'\*='
87
t_DIVEQUAL = r'/='
88
t_MODEQUAL = r'%='
89
t_PLUSEQUAL = r'\+='
90
t_MINUSEQUAL = r'-='
91
t_LSHIFTEQUAL = r'<<='
92
t_RSHIFTEQUAL = r'>>='
93
t_ANDEQUAL = r'&='
94
t_OREQUAL = r'\|='
95
t_XOREQUAL = r'^='
96
97
# Increment/decrement
98
t_PLUSPLUS = r'\+\+'
99
t_MINUSMINUS = r'--'
100
101
# ->
102
t_ARROW = r'->'
103
104
# ?
105
t_CONDOP = r'\?'
106
107
# Delimeters
108
t_LPAREN = r'\('
109
t_RPAREN = r'\)'
110
t_LBRACKET = r'\['
111
t_RBRACKET = r'\]'
112
t_LBRACE = r'\{'
113
t_RBRACE = r'\}'
114
t_COMMA = r','
115
t_PERIOD = r'\.'
116
t_SEMI = r';'
117
t_COLON = r':'
118
t_ELLIPSIS = r'\.\.\.'
119
120
# Identifiers and reserved words
121
122
reserved_map = { }
123
for r in reserved:
124
reserved_map[r.lower()] = r
125
126
def t_ID(t):
127
r'[A-Za-z_][\w_]*'
128
t.type = reserved_map.get(t.value,"ID")
129
return t
130
131
# Integer literal
132
t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
133
134
# Floating literal
135
t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
136
137
# String literal
138
t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
139
140
# Character constant 'c' or L'c'
141
t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
142
143
# Comments
144
def t_comment(t):
145
r'/\*(.|\n)*?\*/'
146
t.lexer.lineno += t.value.count('\n')
147
148
# Preprocessor directive (ignored)
149
def t_preprocessor(t):
150
r'\#(.)*?\n'
151
t.lexer.lineno += 1
152
153
def t_error(t):
154
print("Illegal character %s" % repr(t.value[0]))
155
t.lexer.skip(1)
156
157
lexer = lex.lex(optimize=1)
158
if __name__ == "__main__":
159
lex.runmain(lexer)
160
161
162
163
164
165
166