Path: blob/master/modules/gdscript/gdscript_tokenizer.h
20843 views
/**************************************************************************/1/* gdscript_tokenizer.h */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930#pragma once3132#include "core/templates/hash_map.h"33#include "core/templates/list.h"34#include "core/templates/vector.h"35#include "core/variant/variant.h"3637class GDScriptTokenizer {38public:39enum CursorPlace {40CURSOR_NONE,41CURSOR_BEGINNING,42CURSOR_MIDDLE,43CURSOR_END,44};4546struct Token {47// If this enum changes, please increment the TOKENIZER_VERSION in gdscript_tokenizer_buffer.h48enum Type {49EMPTY,50// Basic51ANNOTATION,52IDENTIFIER,53LITERAL,54// Comparison55LESS,56LESS_EQUAL,57GREATER,58GREATER_EQUAL,59EQUAL_EQUAL,60BANG_EQUAL,61// Logical62AND,63OR,64NOT,65AMPERSAND_AMPERSAND,66PIPE_PIPE,67BANG,68// Bitwise69AMPERSAND,70PIPE,71TILDE,72CARET,73LESS_LESS,74GREATER_GREATER,75// Math76PLUS,77MINUS,78STAR,79STAR_STAR,80SLASH,81PERCENT,82// Assignment83EQUAL,84PLUS_EQUAL,85MINUS_EQUAL,86STAR_EQUAL,87STAR_STAR_EQUAL,88SLASH_EQUAL,89PERCENT_EQUAL,90LESS_LESS_EQUAL,91GREATER_GREATER_EQUAL,92AMPERSAND_EQUAL,93PIPE_EQUAL,94CARET_EQUAL,95// Control flow96IF,97ELIF,98ELSE,99FOR,100WHILE,101BREAK,102CONTINUE,103PASS,104RETURN,105MATCH,106WHEN,107// Keywords108AS,109ASSERT,110AWAIT,111BREAKPOINT,112CLASS,113CLASS_NAME,114TK_CONST, // Conflict with WinAPI.115ENUM,116EXTENDS,117FUNC,118TK_IN, // Conflict with WinAPI.119IS,120NAMESPACE,121PRELOAD,122SELF,123SIGNAL,124STATIC,125SUPER,126TRAIT,127VAR,128TK_VOID, // Conflict with WinAPI.129YIELD,130// Punctuation131BRACKET_OPEN,132BRACKET_CLOSE,133BRACE_OPEN,134BRACE_CLOSE,135PARENTHESIS_OPEN,136PARENTHESIS_CLOSE,137COMMA,138SEMICOLON,139PERIOD,140PERIOD_PERIOD,141PERIOD_PERIOD_PERIOD,142COLON,143DOLLAR,144FORWARD_ARROW,145UNDERSCORE,146// Whitespace147NEWLINE,148INDENT,149DEDENT,150// Constants151CONST_PI,152CONST_TAU,153CONST_INF,154CONST_NAN,155// Error message improvement156VCS_CONFLICT_MARKER,157BACKTICK,158QUESTION_MARK,159// Special160ERROR,161TK_EOF, // "EOF" is reserved162TK_MAX163};164165Type type = EMPTY;166Variant literal;167int start_line = 0;168int start_column = 0;169int end_line = 0;170int end_column = 0;171CursorPlace cursor_place = CURSOR_NONE;172String source;173174const char *get_name() const;175String get_debug_name() const;176bool can_precede_bin_op() const;177bool is_identifier() const;178bool is_node_name() const;179StringName get_identifier() const { return literal; }180181Token(Type p_type) {182type = p_type;183}184185Token() {}186};187188#ifdef TOOLS_ENABLED189struct CommentData {190String comment;191// true: Comment starts at beginning of line or after indentation.192// false: Inline comment (starts after some code).193bool new_line = false;194CommentData() {}195CommentData(const String &p_comment, bool p_new_line) {196comment = p_comment;197new_line = p_new_line;198}199};200virtual const HashMap<int, CommentData> &get_comments() const = 0;201#endif // TOOLS_ENABLED202203static String get_token_name(Token::Type p_token_type);204205#ifdef TOOLS_ENABLED206// This is a temporary solution, as Tokens are not able to store their position, only lines and columns.207virtual int get_current_position() const { return 0; }208virtual String get_source_code() const { return ""; }209#endif // TOOLS_ENABLED210211virtual int get_cursor_line() const = 0;212virtual int get_cursor_column() const = 0;213virtual void set_cursor_position(int p_line, int p_column) = 0;214virtual void set_multiline_mode(bool p_state) = 0;215virtual bool is_past_cursor() const = 0;216virtual void push_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.217virtual void pop_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.218virtual bool is_text() = 0;219220virtual Token scan() = 0;221222virtual ~GDScriptTokenizer() {}223};224225class GDScriptTokenizerText : public GDScriptTokenizer {226String source;227const char32_t *_source = nullptr;228const char32_t *_current = nullptr;229int line = 0;230int column = 0;231int cursor_line = -1;232int cursor_column = -1;233int tab_size = 4;234235// Keep track of multichar tokens.236const char32_t *_start = nullptr;237int start_line = 0;238int start_column = 0;239240// Info cache.241bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.242bool multiline_mode = false;243List<Token> error_stack;244bool pending_newline = false;245Token last_token;246Token last_newline;247int pending_indents = 0;248List<int> indent_stack;249List<List<int>> indent_stack_stack; // For lambdas, which require manipulating the indentation point.250List<char32_t> paren_stack;251char32_t indent_char = '\0';252int position = 0;253int length = 0;254Vector<int> continuation_lines;255#ifdef DEBUG_ENABLED256Vector<String> keyword_list;257#endif // DEBUG_ENABLED258259#ifdef TOOLS_ENABLED260HashMap<int, CommentData> comments;261#endif // TOOLS_ENABLED262263_FORCE_INLINE_ bool _is_at_end() { return position >= length; }264_FORCE_INLINE_ char32_t _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }265int indent_level() const { return indent_stack.size(); }266bool has_error() const { return !error_stack.is_empty(); }267Token pop_error();268char32_t _advance();269String _get_indent_char_name(char32_t ch);270void _skip_whitespace();271void check_indent();272273#ifdef DEBUG_ENABLED274void make_keyword_list();275#endif // DEBUG_ENABLED276277Token make_error(const String &p_message);278void push_error(const String &p_message);279void push_error(const Token &p_error);280Token make_paren_error(char32_t p_paren);281Token make_token(Token::Type p_type);282Token make_literal(const Variant &p_literal);283Token make_identifier(const StringName &p_identifier);284Token check_vcs_marker(char32_t p_test, Token::Type p_double_type);285void push_paren(char32_t p_char);286bool pop_paren(char32_t p_expected);287288void newline(bool p_make_token);289Token number();290Token potential_identifier();291Token string();292Token annotation();293294public:295void set_source_code(const String &p_source_code);296297const Vector<int> &get_continuation_lines() const { return continuation_lines; }298299#ifdef TOOLS_ENABLED300virtual int get_current_position() const override { return position; }301virtual String get_source_code() const override { return source; }302#endif // TOOLS_ENABLED303304virtual int get_cursor_line() const override;305virtual int get_cursor_column() const override;306virtual void set_cursor_position(int p_line, int p_column) override;307virtual void set_multiline_mode(bool p_state) override;308virtual bool is_past_cursor() const override;309virtual void push_expression_indented_block() override; // For lambdas, or blocks inside expressions.310virtual void pop_expression_indented_block() override; // For lambdas, or blocks inside expressions.311virtual bool is_text() override { return true; }312313#ifdef TOOLS_ENABLED314virtual const HashMap<int, CommentData> &get_comments() const override {315return comments;316}317#endif // TOOLS_ENABLED318319virtual Token scan() override;320321GDScriptTokenizerText();322};323324325