/*1* *****************************************************************************2*3* SPDX-License-Identifier: BSD-2-Clause4*5* Copyright (c) 2018-2025 Gavin D. Howard and contributors.6*7* Redistribution and use in source and binary forms, with or without8* modification, are permitted provided that the following conditions are met:9*10* * Redistributions of source code must retain the above copyright notice, this11* list of conditions and the following disclaimer.12*13* * Redistributions in binary form must reproduce the above copyright notice,14* this list of conditions and the following disclaimer in the documentation15* and/or other materials provided with the distribution.16*17* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"18* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE19* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE20* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE21* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR22* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF23* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS24* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN25* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)26* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE27* POSSIBILITY OF SUCH DAMAGE.28*29* *****************************************************************************30*31* The lexer for dc.32*33*/3435#if DC_ENABLED3637#include <ctype.h>3839#include <dc.h>40#include <vm.h>4142bool43dc_lex_negCommand(BcLex* l)44{45char c = l->buf[l->i];46return !BC_LEX_NUM_CHAR(c, false, false);47}4849/**50* Processes a dc command that needs a register. This is where the51* extended-register extension is implemented.52* @param l The lexer.53*/54static void55dc_lex_register(BcLex* l)56{57// If extended register is enabled and the character is whitespace...58if (DC_X && isspace(l->buf[l->i - 1]))59{60char c;6162// Eat the whitespace.63bc_lex_whitespace(l);64c = l->buf[l->i];6566// Check for a letter or underscore.67if (BC_ERR(!isalpha(c) && c != '_'))68{69bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);70}7172// Parse a normal identifier.73l->i += 1;74bc_lex_name(l);75}76else77{78// I don't allow newlines because newlines are used for controlling when79// execution happens, and allowing newlines would just be complex.80if (BC_ERR(l->buf[l->i - 1] == '\n'))81{82bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]);83}8485// Set the lexer string and token.86bc_vec_popAll(&l->str);87bc_vec_pushByte(&l->str, (uchar) l->buf[l->i - 1]);88bc_vec_pushByte(&l->str, '\0');89l->t = BC_LEX_NAME;90}91}9293/**94* Parses a dc string. Since dc's strings need to check for balanced brackets,95* we can't just parse bc and dc strings with different start and end96* characters. Oh, and dc strings need to check for escaped brackets.97* @param l The lexer.98*/99static void100dc_lex_string(BcLex* l)101{102size_t depth, nls, i;103char c;104bool got_more;105106// Set the token and clear the string.107l->t = BC_LEX_STR;108bc_vec_popAll(&l->str);109110do111{112depth = 1;113nls = 0;114got_more = false;115116#if !BC_ENABLE_OSSFUZZ117assert(l->mode != BC_MODE_STDIN || l->buf == vm->buffer.v);118#endif // !BC_ENABLE_OSSFUZZ119120// This is the meat. As long as we don't run into the NUL byte, and we121// have "depth", which means we haven't completely balanced brackets122// yet, we continue eating the string.123for (i = l->i; (c = l->buf[i]) && depth; ++i)124{125// Check for escaped brackets and set the depths as appropriate.126if (c == '\\')127{128c = l->buf[++i];129if (!c) break;130}131else132{133depth += (c == '[');134depth -= (c == ']');135}136137// We want to adjust the line in the lexer as necessary.138nls += (c == '\n');139140if (depth) bc_vec_push(&l->str, &c);141}142143if (BC_ERR(c == '\0' && depth))144{145if (!vm->eof && l->mode != BC_MODE_FILE)146{147got_more = bc_lex_readLine(l);148}149150if (got_more)151{152bc_vec_popAll(&l->str);153}154}155}156while (got_more && depth);157158// Obviously, if we didn't balance, that's an error.159if (BC_ERR(c == '\0' && depth))160{161l->i = i;162bc_lex_err(l, BC_ERR_PARSE_STRING);163}164165bc_vec_pushByte(&l->str, '\0');166167l->i = i;168l->line += nls;169}170171/**172* Lexes a dc token. This is the dc implementation of BcLexNext.173* @param l The lexer.174*/175void176dc_lex_token(BcLex* l)177{178char c = l->buf[l->i++], c2;179size_t i;180181BC_SIG_ASSERT_LOCKED;182183// If the last token was a command that needs a register, we need to parse a184// register, so do so.185for (i = 0; i < dc_lex_regs_len; ++i)186{187// If the token is a register token, take care of it and return.188if (l->last == dc_lex_regs[i])189{190dc_lex_register(l);191return;192}193}194195// These lines are for tokens that easily correspond to one character. We196// just set the token.197if (c >= '"' && c <= '~' &&198(l->t = dc_lex_tokens[(c - '"')]) != BC_LEX_INVALID)199{200return;201}202203// This is the workhorse of the lexer when more complicated things are204// needed.205switch (c)206{207case '\0':208case '\n':209case '\t':210case '\v':211case '\f':212case '\r':213case ' ':214{215bc_lex_commonTokens(l, c);216break;217}218219// We don't have the ! command, so we always expect certain things220// after the exclamation point.221case '!':222{223c2 = l->buf[l->i];224225if (c2 == '=') l->t = BC_LEX_OP_REL_NE;226else if (c2 == '<') l->t = BC_LEX_OP_REL_LE;227else if (c2 == '>') l->t = BC_LEX_OP_REL_GE;228else bc_lex_invalidChar(l, c);229230l->i += 1;231232break;233}234235case '#':236{237bc_lex_lineComment(l);238break;239}240241case '.':242{243c2 = l->buf[l->i];244245// If the character after is a number, this dot is part of a number.246// Otherwise, it's the BSD dot (equivalent to last).247if (BC_NO_ERR(BC_LEX_NUM_CHAR(c2, true, false)))248{249bc_lex_number(l, c);250}251else bc_lex_invalidChar(l, c);252253break;254}255256case '0':257case '1':258case '2':259case '3':260case '4':261case '5':262case '6':263case '7':264case '8':265case '9':266case 'A':267case 'B':268case 'C':269case 'D':270case 'E':271case 'F':272{273bc_lex_number(l, c);274break;275}276277case 'g':278{279c2 = l->buf[l->i];280281if (c2 == 'l') l->t = BC_LEX_KW_LINE_LENGTH;282else if (c2 == 'x') l->t = BC_LEX_EXTENDED_REGISTERS;283else if (c2 == 'z') l->t = BC_LEX_KW_LEADING_ZERO;284else bc_lex_invalidChar(l, c2);285286l->i += 1;287288break;289}290291case '[':292{293dc_lex_string(l);294break;295}296297default:298{299bc_lex_invalidChar(l, c);300}301}302}303#endif // DC_ENABLED304305306