Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/modules/gdscript/gdscript_tokenizer.h
20843 views
1
/**************************************************************************/
2
/* gdscript_tokenizer.h */
3
/**************************************************************************/
4
/* This file is part of: */
5
/* GODOT ENGINE */
6
/* https://godotengine.org */
7
/**************************************************************************/
8
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10
/* */
11
/* Permission is hereby granted, free of charge, to any person obtaining */
12
/* a copy of this software and associated documentation files (the */
13
/* "Software"), to deal in the Software without restriction, including */
14
/* without limitation the rights to use, copy, modify, merge, publish, */
15
/* distribute, sublicense, and/or sell copies of the Software, and to */
16
/* permit persons to whom the Software is furnished to do so, subject to */
17
/* the following conditions: */
18
/* */
19
/* The above copyright notice and this permission notice shall be */
20
/* included in all copies or substantial portions of the Software. */
21
/* */
22
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29
/**************************************************************************/
30
31
#pragma once
32
33
#include "core/templates/hash_map.h"
34
#include "core/templates/list.h"
35
#include "core/templates/vector.h"
36
#include "core/variant/variant.h"
37
38
class GDScriptTokenizer {
39
public:
40
enum CursorPlace {
41
CURSOR_NONE,
42
CURSOR_BEGINNING,
43
CURSOR_MIDDLE,
44
CURSOR_END,
45
};
46
47
struct Token {
48
// If this enum changes, please increment the TOKENIZER_VERSION in gdscript_tokenizer_buffer.h
49
enum Type {
50
EMPTY,
51
// Basic
52
ANNOTATION,
53
IDENTIFIER,
54
LITERAL,
55
// Comparison
56
LESS,
57
LESS_EQUAL,
58
GREATER,
59
GREATER_EQUAL,
60
EQUAL_EQUAL,
61
BANG_EQUAL,
62
// Logical
63
AND,
64
OR,
65
NOT,
66
AMPERSAND_AMPERSAND,
67
PIPE_PIPE,
68
BANG,
69
// Bitwise
70
AMPERSAND,
71
PIPE,
72
TILDE,
73
CARET,
74
LESS_LESS,
75
GREATER_GREATER,
76
// Math
77
PLUS,
78
MINUS,
79
STAR,
80
STAR_STAR,
81
SLASH,
82
PERCENT,
83
// Assignment
84
EQUAL,
85
PLUS_EQUAL,
86
MINUS_EQUAL,
87
STAR_EQUAL,
88
STAR_STAR_EQUAL,
89
SLASH_EQUAL,
90
PERCENT_EQUAL,
91
LESS_LESS_EQUAL,
92
GREATER_GREATER_EQUAL,
93
AMPERSAND_EQUAL,
94
PIPE_EQUAL,
95
CARET_EQUAL,
96
// Control flow
97
IF,
98
ELIF,
99
ELSE,
100
FOR,
101
WHILE,
102
BREAK,
103
CONTINUE,
104
PASS,
105
RETURN,
106
MATCH,
107
WHEN,
108
// Keywords
109
AS,
110
ASSERT,
111
AWAIT,
112
BREAKPOINT,
113
CLASS,
114
CLASS_NAME,
115
TK_CONST, // Conflict with WinAPI.
116
ENUM,
117
EXTENDS,
118
FUNC,
119
TK_IN, // Conflict with WinAPI.
120
IS,
121
NAMESPACE,
122
PRELOAD,
123
SELF,
124
SIGNAL,
125
STATIC,
126
SUPER,
127
TRAIT,
128
VAR,
129
TK_VOID, // Conflict with WinAPI.
130
YIELD,
131
// Punctuation
132
BRACKET_OPEN,
133
BRACKET_CLOSE,
134
BRACE_OPEN,
135
BRACE_CLOSE,
136
PARENTHESIS_OPEN,
137
PARENTHESIS_CLOSE,
138
COMMA,
139
SEMICOLON,
140
PERIOD,
141
PERIOD_PERIOD,
142
PERIOD_PERIOD_PERIOD,
143
COLON,
144
DOLLAR,
145
FORWARD_ARROW,
146
UNDERSCORE,
147
// Whitespace
148
NEWLINE,
149
INDENT,
150
DEDENT,
151
// Constants
152
CONST_PI,
153
CONST_TAU,
154
CONST_INF,
155
CONST_NAN,
156
// Error message improvement
157
VCS_CONFLICT_MARKER,
158
BACKTICK,
159
QUESTION_MARK,
160
// Special
161
ERROR,
162
TK_EOF, // "EOF" is reserved
163
TK_MAX
164
};
165
166
Type type = EMPTY;
167
Variant literal;
168
int start_line = 0;
169
int start_column = 0;
170
int end_line = 0;
171
int end_column = 0;
172
CursorPlace cursor_place = CURSOR_NONE;
173
String source;
174
175
const char *get_name() const;
176
String get_debug_name() const;
177
bool can_precede_bin_op() const;
178
bool is_identifier() const;
179
bool is_node_name() const;
180
StringName get_identifier() const { return literal; }
181
182
Token(Type p_type) {
183
type = p_type;
184
}
185
186
Token() {}
187
};
188
189
#ifdef TOOLS_ENABLED
190
struct CommentData {
191
String comment;
192
// true: Comment starts at beginning of line or after indentation.
193
// false: Inline comment (starts after some code).
194
bool new_line = false;
195
CommentData() {}
196
CommentData(const String &p_comment, bool p_new_line) {
197
comment = p_comment;
198
new_line = p_new_line;
199
}
200
};
201
virtual const HashMap<int, CommentData> &get_comments() const = 0;
202
#endif // TOOLS_ENABLED
203
204
static String get_token_name(Token::Type p_token_type);
205
206
#ifdef TOOLS_ENABLED
207
// This is a temporary solution, as Tokens are not able to store their position, only lines and columns.
208
virtual int get_current_position() const { return 0; }
209
virtual String get_source_code() const { return ""; }
210
#endif // TOOLS_ENABLED
211
212
virtual int get_cursor_line() const = 0;
213
virtual int get_cursor_column() const = 0;
214
virtual void set_cursor_position(int p_line, int p_column) = 0;
215
virtual void set_multiline_mode(bool p_state) = 0;
216
virtual bool is_past_cursor() const = 0;
217
virtual void push_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.
218
virtual void pop_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.
219
virtual bool is_text() = 0;
220
221
virtual Token scan() = 0;
222
223
virtual ~GDScriptTokenizer() {}
224
};
225
226
class GDScriptTokenizerText : public GDScriptTokenizer {
227
String source;
228
const char32_t *_source = nullptr;
229
const char32_t *_current = nullptr;
230
int line = 0;
231
int column = 0;
232
int cursor_line = -1;
233
int cursor_column = -1;
234
int tab_size = 4;
235
236
// Keep track of multichar tokens.
237
const char32_t *_start = nullptr;
238
int start_line = 0;
239
int start_column = 0;
240
241
// Info cache.
242
bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.
243
bool multiline_mode = false;
244
List<Token> error_stack;
245
bool pending_newline = false;
246
Token last_token;
247
Token last_newline;
248
int pending_indents = 0;
249
List<int> indent_stack;
250
List<List<int>> indent_stack_stack; // For lambdas, which require manipulating the indentation point.
251
List<char32_t> paren_stack;
252
char32_t indent_char = '\0';
253
int position = 0;
254
int length = 0;
255
Vector<int> continuation_lines;
256
#ifdef DEBUG_ENABLED
257
Vector<String> keyword_list;
258
#endif // DEBUG_ENABLED
259
260
#ifdef TOOLS_ENABLED
261
HashMap<int, CommentData> comments;
262
#endif // TOOLS_ENABLED
263
264
_FORCE_INLINE_ bool _is_at_end() { return position >= length; }
265
_FORCE_INLINE_ char32_t _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }
266
int indent_level() const { return indent_stack.size(); }
267
bool has_error() const { return !error_stack.is_empty(); }
268
Token pop_error();
269
char32_t _advance();
270
String _get_indent_char_name(char32_t ch);
271
void _skip_whitespace();
272
void check_indent();
273
274
#ifdef DEBUG_ENABLED
275
void make_keyword_list();
276
#endif // DEBUG_ENABLED
277
278
Token make_error(const String &p_message);
279
void push_error(const String &p_message);
280
void push_error(const Token &p_error);
281
Token make_paren_error(char32_t p_paren);
282
Token make_token(Token::Type p_type);
283
Token make_literal(const Variant &p_literal);
284
Token make_identifier(const StringName &p_identifier);
285
Token check_vcs_marker(char32_t p_test, Token::Type p_double_type);
286
void push_paren(char32_t p_char);
287
bool pop_paren(char32_t p_expected);
288
289
void newline(bool p_make_token);
290
Token number();
291
Token potential_identifier();
292
Token string();
293
Token annotation();
294
295
public:
296
void set_source_code(const String &p_source_code);
297
298
const Vector<int> &get_continuation_lines() const { return continuation_lines; }
299
300
#ifdef TOOLS_ENABLED
301
virtual int get_current_position() const override { return position; }
302
virtual String get_source_code() const override { return source; }
303
#endif // TOOLS_ENABLED
304
305
virtual int get_cursor_line() const override;
306
virtual int get_cursor_column() const override;
307
virtual void set_cursor_position(int p_line, int p_column) override;
308
virtual void set_multiline_mode(bool p_state) override;
309
virtual bool is_past_cursor() const override;
310
virtual void push_expression_indented_block() override; // For lambdas, or blocks inside expressions.
311
virtual void pop_expression_indented_block() override; // For lambdas, or blocks inside expressions.
312
virtual bool is_text() override { return true; }
313
314
#ifdef TOOLS_ENABLED
315
virtual const HashMap<int, CommentData> &get_comments() const override {
316
return comments;
317
}
318
#endif // TOOLS_ENABLED
319
320
virtual Token scan() override;
321
322
GDScriptTokenizerText();
323
};
324
325