CoCalc -- clex.py

GitHub Repository: emscripten-core/emscripten
Path: blob/main/third_party/ply/example/ansic/clex.py
⁷⁰⁸⁷ views
1
# ----------------------------------------------------------------------
2
# clex.py
3
#
4
# A lexer for ANSI C.
5
# ----------------------------------------------------------------------
6

7
import sys
8
sys.path.insert(0,"../..")
9

10
import ply.lex as lex
11

12
# Reserved words
13
reserved = (
14
    'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
15
    'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
16
    'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
17
    'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
18
    )
19

20
tokens = reserved + (
21
    # Literals (identifier, integer constant, float constant, string constant, char const)
22
    'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
23

24
    # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
25
    'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
26
    'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
27
    'LOR', 'LAND', 'LNOT',
28
    'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
29
    
30
    # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
31
    'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
32
    'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
33

34
    # Increment/decrement (++,--)
35
    'PLUSPLUS', 'MINUSMINUS',
36

37
    # Structure dereference (->)
38
    'ARROW',
39

40
    # Conditional operator (?)
41
    'CONDOP',
42
    
43
    # Delimeters ( ) [ ] { } , . ; :
44
    'LPAREN', 'RPAREN',
45
    'LBRACKET', 'RBRACKET',
46
    'LBRACE', 'RBRACE',
47
    'COMMA', 'PERIOD', 'SEMI', 'COLON',
48

49
    # Ellipsis (...)
50
    'ELLIPSIS',
51
    )
52

53
# Completely ignored characters
54
t_ignore           = ' \t\x0c'
55

56
# Newlines
57
def t_NEWLINE(t):
58
    r'\n+'
59
    t.lexer.lineno += t.value.count("\n")
60
    
61
# Operators
62
t_PLUS             = r'\+'
63
t_MINUS            = r'-'
64
t_TIMES            = r'\*'
65
t_DIVIDE           = r'/'
66
t_MOD              = r'%'
67
t_OR               = r'\|'
68
t_AND              = r'&'
69
t_NOT              = r'~'
70
t_XOR              = r'\^'
71
t_LSHIFT           = r'<<'
72
t_RSHIFT           = r'>>'
73
t_LOR              = r'\|\|'
74
t_LAND             = r'&&'
75
t_LNOT             = r'!'
76
t_LT               = r'<'
77
t_GT               = r'>'
78
t_LE               = r'<='
79
t_GE               = r'>='
80
t_EQ               = r'=='
81
t_NE               = r'!='
82

83
# Assignment operators
84

85
t_EQUALS           = r'='
86
t_TIMESEQUAL       = r'\*='
87
t_DIVEQUAL         = r'/='
88
t_MODEQUAL         = r'%='
89
t_PLUSEQUAL        = r'\+='
90
t_MINUSEQUAL       = r'-='
91
t_LSHIFTEQUAL      = r'<<='
92
t_RSHIFTEQUAL      = r'>>='
93
t_ANDEQUAL         = r'&='
94
t_OREQUAL          = r'\|='
95
t_XOREQUAL         = r'^='
96

97
# Increment/decrement
98
t_PLUSPLUS         = r'\+\+'
99
t_MINUSMINUS       = r'--'
100

101
# ->
102
t_ARROW            = r'->'
103

104
# ?
105
t_CONDOP           = r'\?'
106

107
# Delimeters
108
t_LPAREN           = r'\('
109
t_RPAREN           = r'\)'
110
t_LBRACKET         = r'\['
111
t_RBRACKET         = r'\]'
112
t_LBRACE           = r'\{'
113
t_RBRACE           = r'\}'
114
t_COMMA            = r','
115
t_PERIOD           = r'\.'
116
t_SEMI             = r';'
117
t_COLON            = r':'
118
t_ELLIPSIS         = r'\.\.\.'
119

120
# Identifiers and reserved words
121

122
reserved_map = { }
123
for r in reserved:
124
    reserved_map[r.lower()] = r
125

126
def t_ID(t):
127
    r'[A-Za-z_][\w_]*'
128
    t.type = reserved_map.get(t.value,"ID")
129
    return t
130

131
# Integer literal
132
t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
133

134
# Floating literal
135
t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
136

137
# String literal
138
t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
139

140
# Character constant 'c' or L'c'
141
t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
142

143
# Comments
144
def t_comment(t):
145
    r'/\*(.|\n)*?\*/'
146
    t.lexer.lineno += t.value.count('\n')
147

148
# Preprocessor directive (ignored)
149
def t_preprocessor(t):
150
    r'\#(.)*?\n'
151
    t.lexer.lineno += 1
152
    
153
def t_error(t):
154
    print("Illegal character %s" % repr(t.value[0]))
155
    t.lexer.skip(1)
156
    
157
lexer = lex.lex(optimize=1)
158
if __name__ == "__main__":
159
    lex.runmain(lexer)
160

161
    
162

163

164

165

166
Product

Resources

Company