Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Roblox
GitHub Repository: Roblox/luau
Path: blob/master/Ast/include/Luau/Lexer.h
2727 views
1
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
2
#pragma once
3
4
#include "Luau/Allocator.h"
5
#include "Luau/Ast.h"
6
#include "Luau/Location.h"
7
#include "Luau/DenseHash.h"
8
#include "Luau/Common.h"
9
10
#include <vector>
11
12
namespace Luau
13
{
14
15
struct Lexeme
16
{
17
enum Type
18
{
19
Eof = 0,
20
21
// 1..255 means actual character values
22
Char_END = 256,
23
24
Equal,
25
LessEqual,
26
GreaterEqual,
27
NotEqual,
28
Dot2,
29
Dot3,
30
SkinnyArrow,
31
DoubleColon,
32
FloorDiv,
33
34
InterpStringBegin,
35
InterpStringMid,
36
InterpStringEnd,
37
// An interpolated string with no expressions (like `x`)
38
InterpStringSimple,
39
40
AddAssign,
41
SubAssign,
42
MulAssign,
43
DivAssign,
44
FloorDivAssign,
45
ModAssign,
46
PowAssign,
47
ConcatAssign,
48
49
RawString,
50
QuotedString,
51
Number,
52
Name,
53
54
Comment,
55
BlockComment,
56
57
Attribute,
58
AttributeOpen,
59
60
BrokenString,
61
BrokenComment,
62
BrokenUnicode,
63
BrokenInterpDoubleBrace,
64
Error,
65
66
Reserved_BEGIN,
67
ReservedAnd = Reserved_BEGIN,
68
ReservedBreak,
69
ReservedDo,
70
ReservedElse,
71
ReservedElseif,
72
ReservedEnd,
73
ReservedFalse,
74
ReservedFor,
75
ReservedFunction,
76
ReservedIf,
77
ReservedIn,
78
ReservedLocal,
79
ReservedNil,
80
ReservedNot,
81
ReservedOr,
82
ReservedRepeat,
83
ReservedReturn,
84
ReservedThen,
85
ReservedTrue,
86
ReservedUntil,
87
ReservedWhile,
88
Reserved_END
89
};
90
91
enum struct QuoteStyle
92
{
93
Single,
94
Double,
95
};
96
97
Type type;
98
Location location;
99
100
// Field declared here, before the union, to ensure that Lexeme size is 32 bytes.
101
private:
102
// length is used to extract a slice from the input buffer.
103
// This field is only valid for certain lexeme types which don't duplicate portions of input
104
// but instead store a pointer to a location in the input buffer and the length of lexeme.
105
unsigned int length;
106
107
public:
108
union
109
{
110
const char* data; // String, Number, Comment
111
const char* name; // Name
112
unsigned int codepoint; // BrokenUnicode
113
};
114
115
Lexeme(const Location& location, Type type);
116
Lexeme(const Location& location, char character);
117
Lexeme(const Location& location, Type type, const char* data, size_t size);
118
Lexeme(const Location& location, Type type, const char* name);
119
120
unsigned int getLength() const;
121
unsigned int getBlockDepth() const;
122
QuoteStyle getQuoteStyle() const;
123
124
std::string toString() const;
125
};
126
127
static_assert(sizeof(Lexeme) <= 32, "Size of `Lexeme` struct should be up to 32 bytes.");
128
129
class AstNameTable
130
{
131
public:
132
AstNameTable(Allocator& allocator);
133
134
AstName addStatic(const char* name, Lexeme::Type type = Lexeme::Name);
135
136
std::pair<AstName, Lexeme::Type> getOrAddWithType(const char* name, size_t length);
137
std::pair<AstName, Lexeme::Type> getWithType(const char* name, size_t length) const;
138
139
AstName getOrAdd(const char* name, size_t len);
140
AstName getOrAdd(const char* name);
141
AstName get(const char* name) const;
142
143
private:
144
struct Entry
145
{
146
AstName value;
147
uint32_t length;
148
Lexeme::Type type;
149
150
bool operator==(const Entry& other) const;
151
};
152
153
struct EntryHash
154
{
155
size_t operator()(const Entry& e) const;
156
};
157
158
DenseHashSet<Entry, EntryHash> data;
159
160
Allocator& allocator;
161
};
162
163
class Lexer
164
{
165
public:
166
Lexer(const char* buffer, std::size_t bufferSize, AstNameTable& names, Position startPosition = {0, 0});
167
168
void setSkipComments(bool skip);
169
void setReadNames(bool read);
170
171
const Location& previousLocation() const
172
{
173
return prevLocation;
174
}
175
176
const Lexeme& next();
177
const Lexeme& next(bool skipComments, bool updatePrevLocation);
178
void nextline();
179
180
Lexeme lookahead();
181
182
const Lexeme& current() const
183
{
184
return lexeme;
185
}
186
187
static bool isReserved(const std::string& word);
188
189
static bool fixupQuotedString(std::string& data);
190
static void fixupMultilineString(std::string& data);
191
192
unsigned int getOffset() const
193
{
194
return offset;
195
}
196
197
enum class BraceType
198
{
199
InterpolatedString,
200
Normal
201
};
202
203
std::optional<Lexer::BraceType> peekBraceStackTop();
204
205
private:
206
char peekch() const;
207
char peekch(unsigned int lookahead) const;
208
209
Position position() const;
210
211
// consume() assumes current character is not a newline for performance; when that is not known, consumeAny() should be used instead.
212
void consume();
213
void consumeAny();
214
215
Lexeme readCommentBody();
216
217
// Given a sequence [===[ or ]===], returns:
218
// 1. number of equal signs (or 0 if none present) between the brackets
219
// 2. -1 if this is not a long comment/string separator
220
// 3. -N if this is a malformed separator
221
// Does *not* consume the closing brace.
222
int skipLongSeparator();
223
224
Lexeme readLongString(const Position& start, int sep, Lexeme::Type ok, Lexeme::Type broken);
225
Lexeme readQuotedString();
226
227
Lexeme readInterpolatedStringBegin();
228
Lexeme readInterpolatedStringSection(Position start, Lexeme::Type formatType, Lexeme::Type endType);
229
230
void readBackslashInString();
231
232
std::pair<AstName, Lexeme::Type> readName();
233
234
Lexeme readNumber(const Position& start, unsigned int startOffset);
235
236
Lexeme readUtf8Error();
237
Lexeme readNext();
238
239
const char* buffer;
240
std::size_t bufferSize;
241
242
unsigned int offset;
243
244
unsigned int line;
245
unsigned int lineOffset;
246
247
Lexeme lexeme;
248
249
Location prevLocation;
250
251
AstNameTable& names;
252
253
bool skipComments;
254
bool readNames;
255
256
std::vector<BraceType> braceStack;
257
};
258
259
inline bool isSpace(char ch)
260
{
261
return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\v' || ch == '\f';
262
}
263
264
} // namespace Luau
265
266