CoCalc -- lex.h

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/bc/include/lex.h
³⁹⁴⁸¹ views
1
/*
2
 * *****************************************************************************
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 *
6
 * Copyright (c) 2018-2025 Gavin D. Howard and contributors.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions are met:
10
 *
11
 * * Redistributions of source code must retain the above copyright notice, this
12
 *   list of conditions and the following disclaimer.
13
 *
14
 * * Redistributions in binary form must reproduce the above copyright notice,
15
 *   this list of conditions and the following disclaimer in the documentation
16
 *   and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
 * POSSIBILITY OF SUCH DAMAGE.
29
 *
30
 * *****************************************************************************
31
 *
32
 * Definitions for bc's lexer.
33
 *
34
 */
35

36
#ifndef BC_LEX_H
37
#define BC_LEX_H
38

39
#include <stdbool.h>
40
#include <stddef.h>
41

42
#include <status.h>
43
#include <vector.h>
44
#include <lang.h>
45

46
/**
47
 * A convenience macro for throwing errors in lex code. This takes care of
48
 * plumbing like passing in the current line the lexer is on.
49
 * @param l  The lexer.
50
 * @param e  The error.
51
 */
52
#if BC_DEBUG
53
#define bc_lex_err(l, e) (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line))
54
#else // BC_DEBUG
55
#define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line))
56
#endif // BC_DEBUG
57

58
/**
59
 * A convenience macro for throwing errors in lex code. This takes care of
60
 * plumbing like passing in the current line the lexer is on.
61
 * @param l  The lexer.
62
 * @param e  The error.
63
 */
64
#if BC_DEBUG
65
#define bc_lex_verr(l, e, ...) \
66
	(bc_vm_handleError((e), __FILE__, __LINE__, (l)->line, __VA_ARGS__))
67
#else // BC_DEBUG
68
#define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__))
69
#endif // BC_DEBUG
70

71
// BC_LEX_NEG_CHAR returns the char that corresponds to negative for the
72
// current calculator.
73
//
74
// BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid
75
// char for numbers. In bc and dc, capital letters are part of numbers, to a
76
// point. (dc only goes up to hex, so its last valid char is 'F'.)
77
#if BC_ENABLED
78

79
#if DC_ENABLED
80
#define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_')
81
#define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F')
82
#else // DC_ENABLED
83
#define BC_LEX_NEG_CHAR ('-')
84
#define BC_LEX_LAST_NUM_CHAR ('Z')
85
#endif // DC_ENABLED
86

87
#else // BC_ENABLED
88

89
#define BC_LEX_NEG_CHAR ('_')
90
#define BC_LEX_LAST_NUM_CHAR ('F')
91

92
#endif // BC_ENABLED
93

94
/**
95
 * Returns true if c is a valid number character.
96
 * @param c         The char to check.
97
 * @param pt        If a decimal point has already been seen.
98
 * @param int_only  True if the number is expected to be an int only, false if
99
 *                  non-integers are allowed.
100
 * @return          True if @a c is a valid number character.
101
 */
102
#define BC_LEX_NUM_CHAR(c, pt, int_only)                               \
103
	(isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \
104
	 ((c) == '.' && !(pt) && !(int_only)))
105

106
/// An enum of lex token types.
107
typedef enum BcLexType
108
{
109
	/// End of file.
110
	BC_LEX_EOF,
111

112
	/// Marker for invalid tokens, used by bc and dc for const data.
113
	BC_LEX_INVALID,
114

115
#if BC_ENABLED
116

117
	/// Increment operator.
118
	BC_LEX_OP_INC,
119

120
	/// Decrement operator.
121
	BC_LEX_OP_DEC,
122

123
#endif // BC_ENABLED
124

125
	/// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer
126
	/// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be
127
	/// able to distinguish them.
128
	BC_LEX_NEG,
129

130
	/// Boolean not.
131
	BC_LEX_OP_BOOL_NOT,
132

133
#if BC_ENABLE_EXTRA_MATH
134

135
	/// Truncation operator.
136
	BC_LEX_OP_TRUNC,
137

138
#endif // BC_ENABLE_EXTRA_MATH
139

140
	/// Power operator.
141
	BC_LEX_OP_POWER,
142

143
	/// Multiplication operator.
144
	BC_LEX_OP_MULTIPLY,
145

146
	/// Division operator.
147
	BC_LEX_OP_DIVIDE,
148

149
	/// Modulus operator.
150
	BC_LEX_OP_MODULUS,
151

152
	/// Addition operator.
153
	BC_LEX_OP_PLUS,
154

155
	/// Subtraction operator.
156
	BC_LEX_OP_MINUS,
157

158
#if BC_ENABLE_EXTRA_MATH
159

160
	/// Places (truncate or extend) operator.
161
	BC_LEX_OP_PLACES,
162

163
	/// Left (decimal) shift operator.
164
	BC_LEX_OP_LSHIFT,
165

166
	/// Right (decimal) shift operator.
167
	BC_LEX_OP_RSHIFT,
168

169
#endif // BC_ENABLE_EXTRA_MATH
170

171
	/// Equal operator.
172
	BC_LEX_OP_REL_EQ,
173

174
	/// Less than or equal operator.
175
	BC_LEX_OP_REL_LE,
176

177
	/// Greater than or equal operator.
178
	BC_LEX_OP_REL_GE,
179

180
	/// Not equal operator.
181
	BC_LEX_OP_REL_NE,
182

183
	/// Less than operator.
184
	BC_LEX_OP_REL_LT,
185

186
	/// Greater than operator.
187
	BC_LEX_OP_REL_GT,
188

189
	/// Boolean or operator.
190
	BC_LEX_OP_BOOL_OR,
191

192
	/// Boolean and operator.
193
	BC_LEX_OP_BOOL_AND,
194

195
#if BC_ENABLED
196

197
	/// Power assignment operator.
198
	BC_LEX_OP_ASSIGN_POWER,
199

200
	/// Multiplication assignment operator.
201
	BC_LEX_OP_ASSIGN_MULTIPLY,
202

203
	/// Division assignment operator.
204
	BC_LEX_OP_ASSIGN_DIVIDE,
205

206
	/// Modulus assignment operator.
207
	BC_LEX_OP_ASSIGN_MODULUS,
208

209
	/// Addition assignment operator.
210
	BC_LEX_OP_ASSIGN_PLUS,
211

212
	/// Subtraction assignment operator.
213
	BC_LEX_OP_ASSIGN_MINUS,
214

215
#if BC_ENABLE_EXTRA_MATH
216

217
	/// Places (truncate or extend) assignment operator.
218
	BC_LEX_OP_ASSIGN_PLACES,
219

220
	/// Left (decimal) shift assignment operator.
221
	BC_LEX_OP_ASSIGN_LSHIFT,
222

223
	/// Right (decimal) shift assignment operator.
224
	BC_LEX_OP_ASSIGN_RSHIFT,
225

226
#endif // BC_ENABLE_EXTRA_MATH
227
#endif // BC_ENABLED
228

229
	/// Assignment operator.
230
	BC_LEX_OP_ASSIGN,
231

232
	/// Newline.
233
	BC_LEX_NLINE,
234

235
	/// Whitespace.
236
	BC_LEX_WHITESPACE,
237

238
	/// Left parenthesis.
239
	BC_LEX_LPAREN,
240

241
	/// Right parenthesis.
242
	BC_LEX_RPAREN,
243

244
	/// Left bracket.
245
	BC_LEX_LBRACKET,
246

247
	/// Comma.
248
	BC_LEX_COMMA,
249

250
	/// Right bracket.
251
	BC_LEX_RBRACKET,
252

253
	/// Left brace.
254
	BC_LEX_LBRACE,
255

256
	/// Semicolon.
257
	BC_LEX_SCOLON,
258

259
	/// Right brace.
260
	BC_LEX_RBRACE,
261

262
	/// String.
263
	BC_LEX_STR,
264

265
	/// Identifier/name.
266
	BC_LEX_NAME,
267

268
	/// Constant number.
269
	BC_LEX_NUMBER,
270

271
	// These keywords are in the order they are in for a reason. Don't change
272
	// the order unless you want a bunch of weird failures in the test suite.
273
	// In fact, almost all of these tokens are in a specific order for a reason.
274

275
#if BC_ENABLED
276

277
	/// bc auto keyword.
278
	BC_LEX_KW_AUTO,
279

280
	/// bc break keyword.
281
	BC_LEX_KW_BREAK,
282

283
	/// bc continue keyword.
284
	BC_LEX_KW_CONTINUE,
285

286
	/// bc define keyword.
287
	BC_LEX_KW_DEFINE,
288

289
	/// bc for keyword.
290
	BC_LEX_KW_FOR,
291

292
	/// bc if keyword.
293
	BC_LEX_KW_IF,
294

295
	/// bc limits keyword.
296
	BC_LEX_KW_LIMITS,
297

298
	/// bc return keyword.
299
	BC_LEX_KW_RETURN,
300

301
	/// bc while keyword.
302
	BC_LEX_KW_WHILE,
303

304
	/// bc halt keyword.
305
	BC_LEX_KW_HALT,
306

307
	/// bc last keyword.
308
	BC_LEX_KW_LAST,
309

310
#endif // BC_ENABLED
311

312
	/// bc ibase keyword.
313
	BC_LEX_KW_IBASE,
314

315
	/// bc obase keyword.
316
	BC_LEX_KW_OBASE,
317

318
	/// bc scale keyword.
319
	BC_LEX_KW_SCALE,
320

321
#if BC_ENABLE_EXTRA_MATH
322

323
	/// bc seed keyword.
324
	BC_LEX_KW_SEED,
325

326
#endif // BC_ENABLE_EXTRA_MATH
327

328
	/// bc length keyword.
329
	BC_LEX_KW_LENGTH,
330

331
	/// bc print keyword.
332
	BC_LEX_KW_PRINT,
333

334
	/// bc sqrt keyword.
335
	BC_LEX_KW_SQRT,
336

337
	/// bc abs keyword.
338
	BC_LEX_KW_ABS,
339

340
	/// bc is_number keyword.
341
	BC_LEX_KW_IS_NUMBER,
342

343
	/// bc is_string keyword.
344
	BC_LEX_KW_IS_STRING,
345

346
#if BC_ENABLE_EXTRA_MATH
347

348
	/// bc irand keyword.
349
	BC_LEX_KW_IRAND,
350

351
#endif // BC_ENABLE_EXTRA_MATH
352

353
	/// bc asciffy keyword.
354
	BC_LEX_KW_ASCIIFY,
355

356
	/// bc modexp keyword.
357
	BC_LEX_KW_MODEXP,
358

359
	/// bc divmod keyword.
360
	BC_LEX_KW_DIVMOD,
361

362
	/// bc quit keyword.
363
	BC_LEX_KW_QUIT,
364

365
	/// bc read keyword.
366
	BC_LEX_KW_READ,
367

368
#if BC_ENABLE_EXTRA_MATH
369

370
	/// bc rand keyword.
371
	BC_LEX_KW_RAND,
372

373
#endif // BC_ENABLE_EXTRA_MATH
374

375
	/// bc maxibase keyword.
376
	BC_LEX_KW_MAXIBASE,
377

378
	/// bc maxobase keyword.
379
	BC_LEX_KW_MAXOBASE,
380

381
	/// bc maxscale keyword.
382
	BC_LEX_KW_MAXSCALE,
383

384
#if BC_ENABLE_EXTRA_MATH
385

386
	/// bc maxrand keyword.
387
	BC_LEX_KW_MAXRAND,
388

389
#endif // BC_ENABLE_EXTRA_MATH
390

391
	/// bc line_length keyword.
392
	BC_LEX_KW_LINE_LENGTH,
393

394
#if BC_ENABLED
395

396
	/// bc global_stacks keyword.
397
	BC_LEX_KW_GLOBAL_STACKS,
398

399
#endif // BC_ENABLED
400

401
	/// bc leading_zero keyword.
402
	BC_LEX_KW_LEADING_ZERO,
403

404
	/// bc stream keyword.
405
	BC_LEX_KW_STREAM,
406

407
	/// bc else keyword.
408
	BC_LEX_KW_ELSE,
409

410
#if DC_ENABLED
411

412
	/// dc extended registers keyword.
413
	BC_LEX_EXTENDED_REGISTERS,
414

415
	/// A special token for dc to calculate equal without a register.
416
	BC_LEX_EQ_NO_REG,
417

418
	/// Colon (array) operator.
419
	BC_LEX_COLON,
420

421
	/// Execute command.
422
	BC_LEX_EXECUTE,
423

424
	/// Print stack command.
425
	BC_LEX_PRINT_STACK,
426

427
	/// Clear stack command.
428
	BC_LEX_CLEAR_STACK,
429

430
	/// Register stack level command.
431
	BC_LEX_REG_STACK_LEVEL,
432

433
	/// Main stack level command.
434
	BC_LEX_STACK_LEVEL,
435

436
	/// Duplicate command.
437
	BC_LEX_DUPLICATE,
438

439
	/// Swap (reverse) command.
440
	BC_LEX_SWAP,
441

442
	/// Pop (remove) command.
443
	BC_LEX_POP,
444

445
	/// Store ibase command.
446
	BC_LEX_STORE_IBASE,
447

448
	/// Store obase command.
449
	BC_LEX_STORE_OBASE,
450

451
	/// Store scale command.
452
	BC_LEX_STORE_SCALE,
453

454
#if BC_ENABLE_EXTRA_MATH
455

456
	/// Store seed command.
457
	BC_LEX_STORE_SEED,
458

459
#endif // BC_ENABLE_EXTRA_MATH
460

461
	/// Load variable onto stack command.
462
	BC_LEX_LOAD,
463

464
	/// Pop off of variable stack onto results stack command.
465
	BC_LEX_LOAD_POP,
466

467
	/// Push onto variable stack command.
468
	BC_LEX_STORE_PUSH,
469

470
	/// Print with pop command.
471
	BC_LEX_PRINT_POP,
472

473
	/// Parameterized quit command.
474
	BC_LEX_NQUIT,
475

476
	/// Execution stack depth command.
477
	BC_LEX_EXEC_STACK_LENGTH,
478

479
	/// Scale of number command. This is needed specifically for dc because bc
480
	/// parses the scale function in parts.
481
	BC_LEX_SCALE_FACTOR,
482

483
	/// Array length command. This is needed specifically for dc because bc
484
	/// just reuses its length keyword.
485
	BC_LEX_ARRAY_LENGTH,
486

487
#endif // DC_ENABLED
488

489
} BcLexType;
490

491
struct BcLex;
492

493
/**
494
 * A function pointer to call when another token is needed. Mostly called by the
495
 * parser.
496
 * @param l  The lexer.
497
 */
498
typedef void (*BcLexNext)(struct BcLex* l);
499

500
/// The lexer.
501
typedef struct BcLex
502
{
503
	/// A pointer to the text to lex.
504
	const char* buf;
505

506
	/// The current index into buf.
507
	size_t i;
508

509
	/// The current line.
510
	size_t line;
511

512
	/// The length of buf.
513
	size_t len;
514

515
	/// The current token.
516
	BcLexType t;
517

518
	/// The previous token.
519
	BcLexType last;
520

521
	/// A string to store extra data for tokens. For example, the @a BC_LEX_STR
522
	/// token really needs to store the actual string, and numbers also need the
523
	/// string.
524
	BcVec str;
525

526
	/// The mode the lexer is in.
527
	BcMode mode;
528

529
} BcLex;
530

531
/**
532
 * Initializes a lexer.
533
 * @param l  The lexer to initialize.
534
 */
535
void
536
bc_lex_init(BcLex* l);
537

538
/**
539
 * Frees a lexer. This is not guarded by #if BC_DEBUG because a separate
540
 * parser is created at runtime to parse read() expressions and dc strings, and
541
 * that parser needs a lexer.
542
 * @param l  The lexer to free.
543
 */
544
void
545
bc_lex_free(BcLex* l);
546

547
/**
548
 * Sets the filename that the lexer will be lexing.
549
 * @param l     The lexer.
550
 * @param file  The filename that the lexer will lex.
551
 */
552
void
553
bc_lex_file(BcLex* l, const char* file);
554

555
/**
556
 * Sets the text the lexer will lex.
557
 * @param l     The lexer.
558
 * @param text  The text to lex.
559
 * @param mode  The mode to lex in.
560
 */
561
void
562
bc_lex_text(BcLex* l, const char* text, BcMode mode);
563

564
/**
565
 * Generic next function for the parser to call. It takes care of calling the
566
 * correct @a BcLexNext function and consuming whitespace.
567
 * @param l  The lexer.
568
 */
569
void
570
bc_lex_next(BcLex* l);
571

572
/**
573
 * Lexes a line comment (one beginning with '#' and going to a newline).
574
 * @param l  The lexer.
575
 */
576
void
577
bc_lex_lineComment(BcLex* l);
578

579
/**
580
 * Lexes a general comment (C-style comment).
581
 * @param l  The lexer.
582
 */
583
void
584
bc_lex_comment(BcLex* l);
585

586
/**
587
 * Lexes whitespace, finding as much as possible.
588
 * @param l  The lexer.
589
 */
590
void
591
bc_lex_whitespace(BcLex* l);
592

593
/**
594
 * Lexes a number that begins with char @a start. This takes care of parsing
595
 * numbers in scientific and engineering notations.
596
 * @param l      The lexer.
597
 * @param start  The starting char of the number. To detect a number and call
598
 *               this function, the lexer had to eat the first char. It fixes
599
 *               that by passing it in.
600
 */
601
void
602
bc_lex_number(BcLex* l, char start);
603

604
/**
605
 * Lexes a name/identifier.
606
 * @param l  The lexer.
607
 */
608
void
609
bc_lex_name(BcLex* l);
610

611
/**
612
 * Lexes common whitespace characters.
613
 * @param l  The lexer.
614
 * @param c  The character to lex.
615
 */
616
void
617
bc_lex_commonTokens(BcLex* l, char c);
618

619
/**
620
 * Throws a parse error because char @a c was invalid.
621
 * @param l  The lexer.
622
 * @param c  The problem character.
623
 */
624
void
625
bc_lex_invalidChar(BcLex* l, char c);
626

627
/**
628
 * Reads a line from stdin and puts it into the lexer's buffer.
629
 * @param l  The lexer.
630
 */
631
bool
632
bc_lex_readLine(BcLex* l);
633

634
#endif // BC_LEX_H
635

636
Product

Resources

Company