#ifndef Py_TOKENIZER_H1#define Py_TOKENIZER_H2#ifdef __cplusplus3extern "C" {4#endif56#include "object.h"78/* Tokenizer interface */910#include "pycore_token.h" /* For token types */1112#define MAXINDENT 100 /* Max indentation level */13#define MAXLEVEL 200 /* Max parentheses level */14#define MAXFSTRINGLEVEL 150 /* Max f-string nesting level */1516enum decoding_state {17STATE_INIT,18STATE_SEEK_CODING,19STATE_NORMAL20};2122enum interactive_underflow_t {23/* Normal mode of operation: return a new token when asked in interactive mode */24IUNDERFLOW_NORMAL,25/* Forcefully return ENDMARKER when asked for a new token in interactive mode. This26* can be used to prevent the tokenizer to prompt the user for new tokens */27IUNDERFLOW_STOP,28};2930struct token {31int level;32int lineno, col_offset, end_lineno, end_col_offset;33const char *start, *end;34PyObject *metadata;35};3637enum tokenizer_mode_kind_t {38TOK_REGULAR_MODE,39TOK_FSTRING_MODE,40};4142#define MAX_EXPR_NESTING 34344typedef struct _tokenizer_mode {45enum tokenizer_mode_kind_t kind;4647int curly_bracket_depth;48int curly_bracket_expr_start_depth;4950char f_string_quote;51int f_string_quote_size;52int f_string_raw;53const char* f_string_start;54const char* f_string_multi_line_start;55int f_string_line_start;5657Py_ssize_t f_string_start_offset;58Py_ssize_t f_string_multi_line_start_offset;5960Py_ssize_t last_expr_size;61Py_ssize_t last_expr_end;62char* last_expr_buffer;63int f_string_debug;64} tokenizer_mode;6566/* Tokenizer state */67struct tok_state {68/* Input state; buf <= cur <= inp <= end */69/* NB an entire line is held in the buffer */70char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL or readline != NULL */71char *cur; /* Next character in buffer */72char *inp; /* End of data in buffer */73int fp_interactive; /* If the file descriptor is interactive */74char *interactive_src_start; /* The start of the source parsed so far in interactive mode */75char *interactive_src_end; /* The end of the source parsed so far in interactive mode */76const char *end; /* End of input buffer if buf != NULL */77const char *start; /* Start of current token if not NULL */78int done; /* E_OK normally, E_EOF at EOF, otherwise error code */79/* NB If done != E_OK, cur must be == inp!!! */80FILE *fp; /* Rest of input; NULL if tokenizing a string */81int tabsize; /* Tab spacing */82int indent; /* Current indentation index */83int indstack[MAXINDENT]; /* Stack of indents */84int atbol; /* Nonzero if at begin of new line */85int pendin; /* Pending indents (if > 0) or dedents (if < 0) */86const char *prompt, *nextprompt; /* For interactive prompting */87int lineno; /* Current line number */88int first_lineno; /* First line of a single line or multi line string89expression (cf. issue 16806) */90int starting_col_offset; /* The column offset at the beginning of a token */91int col_offset; /* Current col offset */92int level; /* () [] {} Parentheses nesting level */93/* Used to allow free continuations inside them */94char parenstack[MAXLEVEL];95int parenlinenostack[MAXLEVEL];96int parencolstack[MAXLEVEL];97PyObject *filename;98/* Stuff for checking on different tab sizes */99int altindstack[MAXINDENT]; /* Stack of alternate indents */100/* Stuff for PEP 0263 */101enum decoding_state decoding_state;102int decoding_erred; /* whether erred in decoding */103char *encoding; /* Source encoding. */104int cont_line; /* whether we are in a continuation line. */105const char* line_start; /* pointer to start of current line */106const char* multi_line_start; /* pointer to start of first line of107a single line or multi line string108expression (cf. issue 16806) */109PyObject *decoding_readline; /* open(...).readline */110PyObject *decoding_buffer;111PyObject *readline; /* readline() function */112const char* enc; /* Encoding for the current str. */113char* str; /* Source string being tokenized (if tokenizing from a string)*/114char* input; /* Tokenizer's newline translated copy of the string. */115116int type_comments; /* Whether to look for type comments */117118/* async/await related fields (still needed depending on feature_version) */119int async_hacks; /* =1 if async/await aren't always keywords */120int async_def; /* =1 if tokens are inside an 'async def' body. */121int async_def_indent; /* Indentation level of the outermost 'async def'. */122int async_def_nl; /* =1 if the outermost 'async def' had at least one123NEWLINE token after it. */124/* How to proceed when asked for a new token in interactive mode */125enum interactive_underflow_t interactive_underflow;126int report_warnings;127// TODO: Factor this into its own thing128tokenizer_mode tok_mode_stack[MAXFSTRINGLEVEL];129int tok_mode_stack_index;130int tok_report_warnings;131int tok_extra_tokens;132int comment_newline;133int implicit_newline;134#ifdef Py_DEBUG135int debug;136#endif137};138139extern struct tok_state *_PyTokenizer_FromString(const char *, int, int);140extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int, int);141extern struct tok_state *_PyTokenizer_FromReadline(PyObject*, const char*, int, int);142extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char*,143const char *, const char *);144extern void _PyTokenizer_Free(struct tok_state *);145extern void _PyToken_Free(struct token *);146extern void _PyToken_Init(struct token *);147extern int _PyTokenizer_Get(struct tok_state *, struct token *);148149#define tok_dump _Py_tok_dump150151#ifdef __cplusplus152}153#endif154#endif /* !Py_TOKENIZER_H */155156157