Path: blob/main/sys/contrib/openzfs/module/lua/llex.c
48383 views
// SPDX-License-Identifier: MIT1/*2** $Id: llex.c,v 2.63.1.3 2015/02/09 17:56:34 roberto Exp $3** Lexical Analyzer4** See Copyright Notice in lua.h5*/67#define llex_c8#define LUA_CORE910#include <sys/lua/lua.h>1112#include "lctype.h"13#include "ldo.h"14#include "llex.h"15#include "lobject.h"16#include "lparser.h"17#include "lstate.h"18#include "lstring.h"19#include "ltable.h"20#include "lzio.h"21222324#define next(ls) (ls->current = zgetc(ls->z))25262728#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')293031/* ORDER RESERVED */32static const char *const luaX_tokens [] = {33"and", "break", "do", "else", "elseif",34"end", "false", "for", "function", "goto", "if",35"in", "local", "nil", "not", "or", "repeat",36"return", "then", "true", "until", "while",37"..", "...", "==", ">=", "<=", "~=", "::", "<eof>",38"<number>", "<name>", "<string>"39};404142#define save_and_next(ls) (save(ls, ls->current), next(ls))434445static l_noret lexerror (LexState *ls, const char *msg, int token);464748static void save (LexState *ls, int c) {49Mbuffer *b = ls->buff;50if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {51size_t newsize;52if (luaZ_sizebuffer(b) >= MAX_SIZET/2)53lexerror(ls, "lexical element too long", 0);54newsize = luaZ_sizebuffer(b) * 2;55luaZ_resizebuffer(ls->L, b, newsize);56}57b->buffer[luaZ_bufflen(b)++] = cast(char, c);58}596061void luaX_init (lua_State *L) {62int i;63for (i=0; i<NUM_RESERVED; i++) {64TString *ts = luaS_new(L, luaX_tokens[i]);65luaS_fix(ts); /* reserved words are never collected */66ts->tsv.extra = cast_byte(i+1); /* reserved word */67}68}697071const char *luaX_token2str (LexState *ls, int token) {72if (token < FIRST_RESERVED) { /* single-byte symbols? */73lua_assert(token == cast(unsigned char, token));74return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) :75luaO_pushfstring(ls->L, "char(%d)", token);76}77else {78const char *s = luaX_tokens[token - FIRST_RESERVED];79if (token < TK_EOS) /* fixed format (symbols and reserved words)? */80return luaO_pushfstring(ls->L, LUA_QS, s);81else /* names, strings, and numerals */82return s;83}84}858687static const char *txtToken (LexState *ls, int token) {88switch (token) {89case TK_NAME:90case TK_STRING:91case TK_NUMBER:92save(ls, '\0');93return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));94default:95return luaX_token2str(ls, token);96}97}9899100static l_noret lexerror (LexState *ls, const char *msg, int token) {101char buff[LUA_IDSIZE];102luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE);103msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);104if (token)105luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));106luaD_throw(ls->L, LUA_ERRSYNTAX);107}108109110l_noret luaX_syntaxerror (LexState *ls, const char *msg) {111lexerror(ls, msg, ls->t.token);112}113114115/*116** creates a new string and anchors it in function's table so that117** it will not be collected until the end of the function's compilation118** (by that time it should be anchored in function's prototype)119*/120TString *luaX_newstring (LexState *ls, const char *str, size_t l) {121lua_State *L = ls->L;122TValue *o; /* entry for `str' */123TString *ts = luaS_newlstr(L, str, l); /* create new string */124setsvalue2s(L, L->top++, ts); /* temporarily anchor it in stack */125o = luaH_set(L, ls->fs->h, L->top - 1);126if (ttisnil(o)) { /* not in use yet? (see 'addK') */127/* boolean value does not need GC barrier;128table has no metatable, so it does not need to invalidate cache */129setbvalue(o, 1); /* t[string] = true */130luaC_checkGC(L);131}132else { /* string already present */133ts = rawtsvalue(keyfromval(o)); /* re-use value previously stored */134}135L->top--; /* remove string from stack */136return ts;137}138139140/*141** increment line number and skips newline sequence (any of142** \n, \r, \n\r, or \r\n)143*/144static void inclinenumber (LexState *ls) {145int old = ls->current;146lua_assert(currIsNewline(ls));147next(ls); /* skip `\n' or `\r' */148if (currIsNewline(ls) && ls->current != old)149next(ls); /* skip `\n\r' or `\r\n' */150if (++ls->linenumber >= MAX_INT)151lexerror(ls, "chunk has too many lines", 0);152}153154155void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,156int firstchar) {157ls->decpoint = '.';158ls->L = L;159ls->current = firstchar;160ls->lookahead.token = TK_EOS; /* no look-ahead token */161ls->z = z;162ls->fs = NULL;163ls->linenumber = 1;164ls->lastline = 1;165ls->source = source;166ls->envn = luaS_new(L, LUA_ENV); /* create env name */167luaS_fix(ls->envn); /* never collect this name */168luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */169}170171172173/*174** =======================================================175** LEXICAL ANALYZER176** =======================================================177*/178179180181static int check_next (LexState *ls, const char *set) {182if (ls->current == '\0' || !strchr(set, ls->current))183return 0;184save_and_next(ls);185return 1;186}187188189/*190** change all characters 'from' in buffer to 'to'191*/192static void buffreplace (LexState *ls, char from, char to) {193size_t n = luaZ_bufflen(ls->buff);194char *p = luaZ_buffer(ls->buff);195while (n--)196if (p[n] == from) p[n] = to;197}198199200#if !defined(getlocaledecpoint)201#define getlocaledecpoint() (localeconv()->decimal_point[0])202#endif203204205#define buff2d(b,e) luaO_str2d(luaZ_buffer(b), luaZ_bufflen(b) - 1, e)206207/*208** in case of format error, try to change decimal point separator to209** the one defined in the current locale and check again210*/211static void trydecpoint (LexState *ls, SemInfo *seminfo) {212char old = ls->decpoint;213ls->decpoint = getlocaledecpoint();214buffreplace(ls, old, ls->decpoint); /* try new decimal separator */215if (!buff2d(ls->buff, &seminfo->r)) {216/* format error with correct decimal point: no more options */217buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */218lexerror(ls, "malformed number", TK_NUMBER);219}220}221222223/* LUA_NUMBER */224/*225** this function is quite liberal in what it accepts, as 'luaO_str2d'226** will reject ill-formed numerals.227*/228static void read_numeral (LexState *ls, SemInfo *seminfo) {229const char *expo = "Ee";230int first = ls->current;231lua_assert(lisdigit(ls->current));232save_and_next(ls);233if (first == '0' && check_next(ls, "Xx")) /* hexadecimal? */234expo = "Pp";235for (;;) {236if (check_next(ls, expo)) /* exponent part? */237(void) check_next(ls, "+-"); /* optional exponent sign */238if (lisxdigit(ls->current) || ls->current == '.')239save_and_next(ls);240else break;241}242save(ls, '\0');243buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */244if (!buff2d(ls->buff, &seminfo->r)) /* format error? */245trydecpoint(ls, seminfo); /* try to update decimal point separator */246}247248249/*250** skip a sequence '[=*[' or ']=*]' and return its number of '='s or251** -1 if sequence is malformed252*/253static int skip_sep (LexState *ls) {254int count = 0;255int s = ls->current;256lua_assert(s == '[' || s == ']');257save_and_next(ls);258while (ls->current == '=') {259save_and_next(ls);260count++;261}262return (ls->current == s) ? count : (-count) - 1;263}264265266static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {267save_and_next(ls); /* skip 2nd `[' */268if (currIsNewline(ls)) /* string starts with a newline? */269inclinenumber(ls); /* skip it */270for (;;) {271switch (ls->current) {272case EOZ:273lexerror(ls, (seminfo) ? "unfinished long string" :274"unfinished long comment", TK_EOS);275break; /* to avoid warnings */276case ']': {277if (skip_sep(ls) == sep) {278save_and_next(ls); /* skip 2nd `]' */279goto endloop;280}281break;282}283case '\n': case '\r': {284save(ls, '\n');285inclinenumber(ls);286if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */287break;288}289default: {290if (seminfo) save_and_next(ls);291else next(ls);292}293}294} endloop:295if (seminfo)296seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),297luaZ_bufflen(ls->buff) - 2*(2 + sep));298}299300301static void escerror (LexState *ls, int *c, int n, const char *msg) {302int i;303luaZ_resetbuffer(ls->buff); /* prepare error message */304save(ls, '\\');305for (i = 0; i < n && c[i] != EOZ; i++)306save(ls, c[i]);307lexerror(ls, msg, TK_STRING);308}309310311static int readhexaesc (LexState *ls) {312int c[3], i; /* keep input for error message */313int r = 0; /* result accumulator */314c[0] = 'x'; /* for error message */315for (i = 1; i < 3; i++) { /* read two hexadecimal digits */316c[i] = next(ls);317if (!lisxdigit(c[i]))318escerror(ls, c, i + 1, "hexadecimal digit expected");319r = (r << 4) + luaO_hexavalue(c[i]);320}321return r;322}323324325static int readdecesc (LexState *ls) {326int c[3], i;327int r = 0; /* result accumulator */328for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */329c[i] = ls->current;330r = 10*r + c[i] - '0';331next(ls);332}333if (r > UCHAR_MAX)334escerror(ls, c, i, "decimal escape too large");335return r;336}337338339static void read_string (LexState *ls, int del, SemInfo *seminfo) {340save_and_next(ls); /* keep delimiter (for error messages) */341while (ls->current != del) {342switch (ls->current) {343case EOZ:344lexerror(ls, "unfinished string", TK_EOS);345break; /* to avoid warnings */346case '\n':347case '\r':348lexerror(ls, "unfinished string", TK_STRING);349break; /* to avoid warnings */350case '\\': { /* escape sequences */351int c; /* final character to be saved */352next(ls); /* do not save the `\' */353switch (ls->current) {354case 'a': c = '\a'; goto read_save;355case 'b': c = '\b'; goto read_save;356case 'f': c = '\f'; goto read_save;357case 'n': c = '\n'; goto read_save;358case 'r': c = '\r'; goto read_save;359case 't': c = '\t'; goto read_save;360case 'v': c = '\v'; goto read_save;361case 'x': c = readhexaesc(ls); goto read_save;362case '\n': case '\r':363inclinenumber(ls); c = '\n'; goto only_save;364case '\\': case '\"': case '\'':365c = ls->current; goto read_save;366case EOZ: goto no_save; /* will raise an error next loop */367case 'z': { /* zap following span of spaces */368next(ls); /* skip the 'z' */369while (lisspace(ls->current)) {370if (currIsNewline(ls)) inclinenumber(ls);371else next(ls);372}373goto no_save;374}375default: {376if (!lisdigit(ls->current))377escerror(ls, &ls->current, 1, "invalid escape sequence");378/* digital escape \ddd */379c = readdecesc(ls);380goto only_save;381}382}383read_save: next(ls); /* read next character */384only_save: save(ls, c); /* save 'c' */385no_save: break;386}387default:388save_and_next(ls);389}390}391save_and_next(ls); /* skip delimiter */392seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,393luaZ_bufflen(ls->buff) - 2);394}395396397static int llex (LexState *ls, SemInfo *seminfo) {398luaZ_resetbuffer(ls->buff);399for (;;) {400switch (ls->current) {401case '\n': case '\r': { /* line breaks */402inclinenumber(ls);403break;404}405case ' ': case '\f': case '\t': case '\v': { /* spaces */406next(ls);407break;408}409case '-': { /* '-' or '--' (comment) */410next(ls);411if (ls->current != '-') return '-';412/* else is a comment */413next(ls);414if (ls->current == '[') { /* long comment? */415int sep = skip_sep(ls);416luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */417if (sep >= 0) {418read_long_string(ls, NULL, sep); /* skip long comment */419luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */420break;421}422}423/* else short comment */424while (!currIsNewline(ls) && ls->current != EOZ)425next(ls); /* skip until end of line (or end of file) */426break;427}428case '[': { /* long string or simply '[' */429int sep = skip_sep(ls);430if (sep >= 0) {431read_long_string(ls, seminfo, sep);432return TK_STRING;433} else if (sep == -1) {434return '[';435} else {436lexerror(ls, "invalid long string delimiter", TK_STRING);437break;438}439}440case '=': {441next(ls);442if (ls->current != '=') return '=';443else { next(ls); return TK_EQ; }444}445case '<': {446next(ls);447if (ls->current != '=') return '<';448else { next(ls); return TK_LE; }449}450case '>': {451next(ls);452if (ls->current != '=') return '>';453else { next(ls); return TK_GE; }454}455case '~': {456next(ls);457if (ls->current != '=') return '~';458else { next(ls); return TK_NE; }459}460case ':': {461next(ls);462if (ls->current != ':') return ':';463else { next(ls); return TK_DBCOLON; }464}465case '"': case '\'': { /* short literal strings */466read_string(ls, ls->current, seminfo);467return TK_STRING;468}469case '.': { /* '.', '..', '...', or number */470save_and_next(ls);471if (check_next(ls, ".")) {472if (check_next(ls, "."))473return TK_DOTS; /* '...' */474else return TK_CONCAT; /* '..' */475}476else if (!lisdigit(ls->current)) return '.';477/* else go through */478}479zfs_fallthrough;480case '0': case '1': case '2': case '3': case '4':481case '5': case '6': case '7': case '8': case '9': {482read_numeral(ls, seminfo);483return TK_NUMBER;484}485case EOZ: {486return TK_EOS;487}488default: {489if (lislalpha(ls->current)) { /* identifier or reserved word? */490TString *ts;491do {492save_and_next(ls);493} while (lislalnum(ls->current));494ts = luaX_newstring(ls, luaZ_buffer(ls->buff),495luaZ_bufflen(ls->buff));496seminfo->ts = ts;497if (isreserved(ts)) /* reserved word? */498return ts->tsv.extra - 1 + FIRST_RESERVED;499else {500return TK_NAME;501}502}503else { /* single-char tokens (+ - / ...) */504int c = ls->current;505next(ls);506return c;507}508}509}510}511}512513514void luaX_next (LexState *ls) {515ls->lastline = ls->linenumber;516if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */517ls->t = ls->lookahead; /* use this one */518ls->lookahead.token = TK_EOS; /* and discharge it */519}520else521ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */522}523524525int luaX_lookahead (LexState *ls) {526lua_assert(ls->lookahead.token == TK_EOS);527ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);528return ls->lookahead.token;529}530531532