/*1* *****************************************************************************2*3* SPDX-License-Identifier: BSD-2-Clause4*5* Copyright (c) 2018-2025 Gavin D. Howard and contributors.6*7* Redistribution and use in source and binary forms, with or without8* modification, are permitted provided that the following conditions are met:9*10* * Redistributions of source code must retain the above copyright notice, this11* list of conditions and the following disclaimer.12*13* * Redistributions in binary form must reproduce the above copyright notice,14* this list of conditions and the following disclaimer in the documentation15* and/or other materials provided with the distribution.16*17* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"18* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE19* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE20* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE21* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR22* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF23* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS24* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN25* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)26* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE27* POSSIBILITY OF SUCH DAMAGE.28*29* *****************************************************************************30*31* Definitions for bc's lexer.32*33*/3435#ifndef BC_LEX_H36#define BC_LEX_H3738#include <stdbool.h>39#include <stddef.h>4041#include <status.h>42#include <vector.h>43#include <lang.h>4445/**46* A convenience macro for throwing errors in lex code. This takes care of47* plumbing like passing in the current line the lexer is on.48* @param l The lexer.49* @param e The error.50*/51#if BC_DEBUG52#define bc_lex_err(l, e) (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line))53#else // BC_DEBUG54#define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line))55#endif // BC_DEBUG5657/**58* A convenience macro for throwing errors in lex code. This takes care of59* plumbing like passing in the current line the lexer is on.60* @param l The lexer.61* @param e The error.62*/63#if BC_DEBUG64#define bc_lex_verr(l, e, ...) \65(bc_vm_handleError((e), __FILE__, __LINE__, (l)->line, __VA_ARGS__))66#else // BC_DEBUG67#define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__))68#endif // BC_DEBUG6970// BC_LEX_NEG_CHAR returns the char that corresponds to negative for the71// current calculator.72//73// BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid74// char for numbers. In bc and dc, capital letters are part of numbers, to a75// point. (dc only goes up to hex, so its last valid char is 'F'.)76#if BC_ENABLED7778#if DC_ENABLED79#define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_')80#define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F')81#else // DC_ENABLED82#define BC_LEX_NEG_CHAR ('-')83#define BC_LEX_LAST_NUM_CHAR ('Z')84#endif // DC_ENABLED8586#else // BC_ENABLED8788#define BC_LEX_NEG_CHAR ('_')89#define BC_LEX_LAST_NUM_CHAR ('F')9091#endif // BC_ENABLED9293/**94* Returns true if c is a valid number character.95* @param c The char to check.96* @param pt If a decimal point has already been seen.97* @param int_only True if the number is expected to be an int only, false if98* non-integers are allowed.99* @return True if @a c is a valid number character.100*/101#define BC_LEX_NUM_CHAR(c, pt, int_only) \102(isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \103((c) == '.' && !(pt) && !(int_only)))104105/// An enum of lex token types.106typedef enum BcLexType107{108/// End of file.109BC_LEX_EOF,110111/// Marker for invalid tokens, used by bc and dc for const data.112BC_LEX_INVALID,113114#if BC_ENABLED115116/// Increment operator.117BC_LEX_OP_INC,118119/// Decrement operator.120BC_LEX_OP_DEC,121122#endif // BC_ENABLED123124/// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer125/// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be126/// able to distinguish them.127BC_LEX_NEG,128129/// Boolean not.130BC_LEX_OP_BOOL_NOT,131132#if BC_ENABLE_EXTRA_MATH133134/// Truncation operator.135BC_LEX_OP_TRUNC,136137#endif // BC_ENABLE_EXTRA_MATH138139/// Power operator.140BC_LEX_OP_POWER,141142/// Multiplication operator.143BC_LEX_OP_MULTIPLY,144145/// Division operator.146BC_LEX_OP_DIVIDE,147148/// Modulus operator.149BC_LEX_OP_MODULUS,150151/// Addition operator.152BC_LEX_OP_PLUS,153154/// Subtraction operator.155BC_LEX_OP_MINUS,156157#if BC_ENABLE_EXTRA_MATH158159/// Places (truncate or extend) operator.160BC_LEX_OP_PLACES,161162/// Left (decimal) shift operator.163BC_LEX_OP_LSHIFT,164165/// Right (decimal) shift operator.166BC_LEX_OP_RSHIFT,167168#endif // BC_ENABLE_EXTRA_MATH169170/// Equal operator.171BC_LEX_OP_REL_EQ,172173/// Less than or equal operator.174BC_LEX_OP_REL_LE,175176/// Greater than or equal operator.177BC_LEX_OP_REL_GE,178179/// Not equal operator.180BC_LEX_OP_REL_NE,181182/// Less than operator.183BC_LEX_OP_REL_LT,184185/// Greater than operator.186BC_LEX_OP_REL_GT,187188/// Boolean or operator.189BC_LEX_OP_BOOL_OR,190191/// Boolean and operator.192BC_LEX_OP_BOOL_AND,193194#if BC_ENABLED195196/// Power assignment operator.197BC_LEX_OP_ASSIGN_POWER,198199/// Multiplication assignment operator.200BC_LEX_OP_ASSIGN_MULTIPLY,201202/// Division assignment operator.203BC_LEX_OP_ASSIGN_DIVIDE,204205/// Modulus assignment operator.206BC_LEX_OP_ASSIGN_MODULUS,207208/// Addition assignment operator.209BC_LEX_OP_ASSIGN_PLUS,210211/// Subtraction assignment operator.212BC_LEX_OP_ASSIGN_MINUS,213214#if BC_ENABLE_EXTRA_MATH215216/// Places (truncate or extend) assignment operator.217BC_LEX_OP_ASSIGN_PLACES,218219/// Left (decimal) shift assignment operator.220BC_LEX_OP_ASSIGN_LSHIFT,221222/// Right (decimal) shift assignment operator.223BC_LEX_OP_ASSIGN_RSHIFT,224225#endif // BC_ENABLE_EXTRA_MATH226#endif // BC_ENABLED227228/// Assignment operator.229BC_LEX_OP_ASSIGN,230231/// Newline.232BC_LEX_NLINE,233234/// Whitespace.235BC_LEX_WHITESPACE,236237/// Left parenthesis.238BC_LEX_LPAREN,239240/// Right parenthesis.241BC_LEX_RPAREN,242243/// Left bracket.244BC_LEX_LBRACKET,245246/// Comma.247BC_LEX_COMMA,248249/// Right bracket.250BC_LEX_RBRACKET,251252/// Left brace.253BC_LEX_LBRACE,254255/// Semicolon.256BC_LEX_SCOLON,257258/// Right brace.259BC_LEX_RBRACE,260261/// String.262BC_LEX_STR,263264/// Identifier/name.265BC_LEX_NAME,266267/// Constant number.268BC_LEX_NUMBER,269270// These keywords are in the order they are in for a reason. Don't change271// the order unless you want a bunch of weird failures in the test suite.272// In fact, almost all of these tokens are in a specific order for a reason.273274#if BC_ENABLED275276/// bc auto keyword.277BC_LEX_KW_AUTO,278279/// bc break keyword.280BC_LEX_KW_BREAK,281282/// bc continue keyword.283BC_LEX_KW_CONTINUE,284285/// bc define keyword.286BC_LEX_KW_DEFINE,287288/// bc for keyword.289BC_LEX_KW_FOR,290291/// bc if keyword.292BC_LEX_KW_IF,293294/// bc limits keyword.295BC_LEX_KW_LIMITS,296297/// bc return keyword.298BC_LEX_KW_RETURN,299300/// bc while keyword.301BC_LEX_KW_WHILE,302303/// bc halt keyword.304BC_LEX_KW_HALT,305306/// bc last keyword.307BC_LEX_KW_LAST,308309#endif // BC_ENABLED310311/// bc ibase keyword.312BC_LEX_KW_IBASE,313314/// bc obase keyword.315BC_LEX_KW_OBASE,316317/// bc scale keyword.318BC_LEX_KW_SCALE,319320#if BC_ENABLE_EXTRA_MATH321322/// bc seed keyword.323BC_LEX_KW_SEED,324325#endif // BC_ENABLE_EXTRA_MATH326327/// bc length keyword.328BC_LEX_KW_LENGTH,329330/// bc print keyword.331BC_LEX_KW_PRINT,332333/// bc sqrt keyword.334BC_LEX_KW_SQRT,335336/// bc abs keyword.337BC_LEX_KW_ABS,338339/// bc is_number keyword.340BC_LEX_KW_IS_NUMBER,341342/// bc is_string keyword.343BC_LEX_KW_IS_STRING,344345#if BC_ENABLE_EXTRA_MATH346347/// bc irand keyword.348BC_LEX_KW_IRAND,349350#endif // BC_ENABLE_EXTRA_MATH351352/// bc asciffy keyword.353BC_LEX_KW_ASCIIFY,354355/// bc modexp keyword.356BC_LEX_KW_MODEXP,357358/// bc divmod keyword.359BC_LEX_KW_DIVMOD,360361/// bc quit keyword.362BC_LEX_KW_QUIT,363364/// bc read keyword.365BC_LEX_KW_READ,366367#if BC_ENABLE_EXTRA_MATH368369/// bc rand keyword.370BC_LEX_KW_RAND,371372#endif // BC_ENABLE_EXTRA_MATH373374/// bc maxibase keyword.375BC_LEX_KW_MAXIBASE,376377/// bc maxobase keyword.378BC_LEX_KW_MAXOBASE,379380/// bc maxscale keyword.381BC_LEX_KW_MAXSCALE,382383#if BC_ENABLE_EXTRA_MATH384385/// bc maxrand keyword.386BC_LEX_KW_MAXRAND,387388#endif // BC_ENABLE_EXTRA_MATH389390/// bc line_length keyword.391BC_LEX_KW_LINE_LENGTH,392393#if BC_ENABLED394395/// bc global_stacks keyword.396BC_LEX_KW_GLOBAL_STACKS,397398#endif // BC_ENABLED399400/// bc leading_zero keyword.401BC_LEX_KW_LEADING_ZERO,402403/// bc stream keyword.404BC_LEX_KW_STREAM,405406/// bc else keyword.407BC_LEX_KW_ELSE,408409#if DC_ENABLED410411/// dc extended registers keyword.412BC_LEX_EXTENDED_REGISTERS,413414/// A special token for dc to calculate equal without a register.415BC_LEX_EQ_NO_REG,416417/// Colon (array) operator.418BC_LEX_COLON,419420/// Execute command.421BC_LEX_EXECUTE,422423/// Print stack command.424BC_LEX_PRINT_STACK,425426/// Clear stack command.427BC_LEX_CLEAR_STACK,428429/// Register stack level command.430BC_LEX_REG_STACK_LEVEL,431432/// Main stack level command.433BC_LEX_STACK_LEVEL,434435/// Duplicate command.436BC_LEX_DUPLICATE,437438/// Swap (reverse) command.439BC_LEX_SWAP,440441/// Pop (remove) command.442BC_LEX_POP,443444/// Store ibase command.445BC_LEX_STORE_IBASE,446447/// Store obase command.448BC_LEX_STORE_OBASE,449450/// Store scale command.451BC_LEX_STORE_SCALE,452453#if BC_ENABLE_EXTRA_MATH454455/// Store seed command.456BC_LEX_STORE_SEED,457458#endif // BC_ENABLE_EXTRA_MATH459460/// Load variable onto stack command.461BC_LEX_LOAD,462463/// Pop off of variable stack onto results stack command.464BC_LEX_LOAD_POP,465466/// Push onto variable stack command.467BC_LEX_STORE_PUSH,468469/// Print with pop command.470BC_LEX_PRINT_POP,471472/// Parameterized quit command.473BC_LEX_NQUIT,474475/// Execution stack depth command.476BC_LEX_EXEC_STACK_LENGTH,477478/// Scale of number command. This is needed specifically for dc because bc479/// parses the scale function in parts.480BC_LEX_SCALE_FACTOR,481482/// Array length command. This is needed specifically for dc because bc483/// just reuses its length keyword.484BC_LEX_ARRAY_LENGTH,485486#endif // DC_ENABLED487488} BcLexType;489490struct BcLex;491492/**493* A function pointer to call when another token is needed. Mostly called by the494* parser.495* @param l The lexer.496*/497typedef void (*BcLexNext)(struct BcLex* l);498499/// The lexer.500typedef struct BcLex501{502/// A pointer to the text to lex.503const char* buf;504505/// The current index into buf.506size_t i;507508/// The current line.509size_t line;510511/// The length of buf.512size_t len;513514/// The current token.515BcLexType t;516517/// The previous token.518BcLexType last;519520/// A string to store extra data for tokens. For example, the @a BC_LEX_STR521/// token really needs to store the actual string, and numbers also need the522/// string.523BcVec str;524525/// The mode the lexer is in.526BcMode mode;527528} BcLex;529530/**531* Initializes a lexer.532* @param l The lexer to initialize.533*/534void535bc_lex_init(BcLex* l);536537/**538* Frees a lexer. This is not guarded by #if BC_DEBUG because a separate539* parser is created at runtime to parse read() expressions and dc strings, and540* that parser needs a lexer.541* @param l The lexer to free.542*/543void544bc_lex_free(BcLex* l);545546/**547* Sets the filename that the lexer will be lexing.548* @param l The lexer.549* @param file The filename that the lexer will lex.550*/551void552bc_lex_file(BcLex* l, const char* file);553554/**555* Sets the text the lexer will lex.556* @param l The lexer.557* @param text The text to lex.558* @param mode The mode to lex in.559*/560void561bc_lex_text(BcLex* l, const char* text, BcMode mode);562563/**564* Generic next function for the parser to call. It takes care of calling the565* correct @a BcLexNext function and consuming whitespace.566* @param l The lexer.567*/568void569bc_lex_next(BcLex* l);570571/**572* Lexes a line comment (one beginning with '#' and going to a newline).573* @param l The lexer.574*/575void576bc_lex_lineComment(BcLex* l);577578/**579* Lexes a general comment (C-style comment).580* @param l The lexer.581*/582void583bc_lex_comment(BcLex* l);584585/**586* Lexes whitespace, finding as much as possible.587* @param l The lexer.588*/589void590bc_lex_whitespace(BcLex* l);591592/**593* Lexes a number that begins with char @a start. This takes care of parsing594* numbers in scientific and engineering notations.595* @param l The lexer.596* @param start The starting char of the number. To detect a number and call597* this function, the lexer had to eat the first char. It fixes598* that by passing it in.599*/600void601bc_lex_number(BcLex* l, char start);602603/**604* Lexes a name/identifier.605* @param l The lexer.606*/607void608bc_lex_name(BcLex* l);609610/**611* Lexes common whitespace characters.612* @param l The lexer.613* @param c The character to lex.614*/615void616bc_lex_commonTokens(BcLex* l, char c);617618/**619* Throws a parse error because char @a c was invalid.620* @param l The lexer.621* @param c The problem character.622*/623void624bc_lex_invalidChar(BcLex* l, char c);625626/**627* Reads a line from stdin and puts it into the lexer's buffer.628* @param l The lexer.629*/630bool631bc_lex_readLine(BcLex* l);632633#endif // BC_LEX_H634635636