Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/bc/include/lex.h
39481 views
1
/*
2
* *****************************************************************************
3
*
4
* SPDX-License-Identifier: BSD-2-Clause
5
*
6
* Copyright (c) 2018-2025 Gavin D. Howard and contributors.
7
*
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions are met:
10
*
11
* * Redistributions of source code must retain the above copyright notice, this
12
* list of conditions and the following disclaimer.
13
*
14
* * Redistributions in binary form must reproduce the above copyright notice,
15
* this list of conditions and the following disclaimer in the documentation
16
* and/or other materials provided with the distribution.
17
*
18
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
* POSSIBILITY OF SUCH DAMAGE.
29
*
30
* *****************************************************************************
31
*
32
* Definitions for bc's lexer.
33
*
34
*/
35
36
#ifndef BC_LEX_H
37
#define BC_LEX_H
38
39
#include <stdbool.h>
40
#include <stddef.h>
41
42
#include <status.h>
43
#include <vector.h>
44
#include <lang.h>
45
46
/**
47
* A convenience macro for throwing errors in lex code. This takes care of
48
* plumbing like passing in the current line the lexer is on.
49
* @param l The lexer.
50
* @param e The error.
51
*/
52
#if BC_DEBUG
53
#define bc_lex_err(l, e) (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line))
54
#else // BC_DEBUG
55
#define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line))
56
#endif // BC_DEBUG
57
58
/**
59
* A convenience macro for throwing errors in lex code. This takes care of
60
* plumbing like passing in the current line the lexer is on.
61
* @param l The lexer.
62
* @param e The error.
63
*/
64
#if BC_DEBUG
65
#define bc_lex_verr(l, e, ...) \
66
(bc_vm_handleError((e), __FILE__, __LINE__, (l)->line, __VA_ARGS__))
67
#else // BC_DEBUG
68
#define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__))
69
#endif // BC_DEBUG
70
71
// BC_LEX_NEG_CHAR returns the char that corresponds to negative for the
72
// current calculator.
73
//
74
// BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid
75
// char for numbers. In bc and dc, capital letters are part of numbers, to a
76
// point. (dc only goes up to hex, so its last valid char is 'F'.)
77
#if BC_ENABLED
78
79
#if DC_ENABLED
80
#define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_')
81
#define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F')
82
#else // DC_ENABLED
83
#define BC_LEX_NEG_CHAR ('-')
84
#define BC_LEX_LAST_NUM_CHAR ('Z')
85
#endif // DC_ENABLED
86
87
#else // BC_ENABLED
88
89
#define BC_LEX_NEG_CHAR ('_')
90
#define BC_LEX_LAST_NUM_CHAR ('F')
91
92
#endif // BC_ENABLED
93
94
/**
95
* Returns true if c is a valid number character.
96
* @param c The char to check.
97
* @param pt If a decimal point has already been seen.
98
* @param int_only True if the number is expected to be an int only, false if
99
* non-integers are allowed.
100
* @return True if @a c is a valid number character.
101
*/
102
#define BC_LEX_NUM_CHAR(c, pt, int_only) \
103
(isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \
104
((c) == '.' && !(pt) && !(int_only)))
105
106
/// An enum of lex token types.
107
typedef enum BcLexType
108
{
109
/// End of file.
110
BC_LEX_EOF,
111
112
/// Marker for invalid tokens, used by bc and dc for const data.
113
BC_LEX_INVALID,
114
115
#if BC_ENABLED
116
117
/// Increment operator.
118
BC_LEX_OP_INC,
119
120
/// Decrement operator.
121
BC_LEX_OP_DEC,
122
123
#endif // BC_ENABLED
124
125
/// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer
126
/// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be
127
/// able to distinguish them.
128
BC_LEX_NEG,
129
130
/// Boolean not.
131
BC_LEX_OP_BOOL_NOT,
132
133
#if BC_ENABLE_EXTRA_MATH
134
135
/// Truncation operator.
136
BC_LEX_OP_TRUNC,
137
138
#endif // BC_ENABLE_EXTRA_MATH
139
140
/// Power operator.
141
BC_LEX_OP_POWER,
142
143
/// Multiplication operator.
144
BC_LEX_OP_MULTIPLY,
145
146
/// Division operator.
147
BC_LEX_OP_DIVIDE,
148
149
/// Modulus operator.
150
BC_LEX_OP_MODULUS,
151
152
/// Addition operator.
153
BC_LEX_OP_PLUS,
154
155
/// Subtraction operator.
156
BC_LEX_OP_MINUS,
157
158
#if BC_ENABLE_EXTRA_MATH
159
160
/// Places (truncate or extend) operator.
161
BC_LEX_OP_PLACES,
162
163
/// Left (decimal) shift operator.
164
BC_LEX_OP_LSHIFT,
165
166
/// Right (decimal) shift operator.
167
BC_LEX_OP_RSHIFT,
168
169
#endif // BC_ENABLE_EXTRA_MATH
170
171
/// Equal operator.
172
BC_LEX_OP_REL_EQ,
173
174
/// Less than or equal operator.
175
BC_LEX_OP_REL_LE,
176
177
/// Greater than or equal operator.
178
BC_LEX_OP_REL_GE,
179
180
/// Not equal operator.
181
BC_LEX_OP_REL_NE,
182
183
/// Less than operator.
184
BC_LEX_OP_REL_LT,
185
186
/// Greater than operator.
187
BC_LEX_OP_REL_GT,
188
189
/// Boolean or operator.
190
BC_LEX_OP_BOOL_OR,
191
192
/// Boolean and operator.
193
BC_LEX_OP_BOOL_AND,
194
195
#if BC_ENABLED
196
197
/// Power assignment operator.
198
BC_LEX_OP_ASSIGN_POWER,
199
200
/// Multiplication assignment operator.
201
BC_LEX_OP_ASSIGN_MULTIPLY,
202
203
/// Division assignment operator.
204
BC_LEX_OP_ASSIGN_DIVIDE,
205
206
/// Modulus assignment operator.
207
BC_LEX_OP_ASSIGN_MODULUS,
208
209
/// Addition assignment operator.
210
BC_LEX_OP_ASSIGN_PLUS,
211
212
/// Subtraction assignment operator.
213
BC_LEX_OP_ASSIGN_MINUS,
214
215
#if BC_ENABLE_EXTRA_MATH
216
217
/// Places (truncate or extend) assignment operator.
218
BC_LEX_OP_ASSIGN_PLACES,
219
220
/// Left (decimal) shift assignment operator.
221
BC_LEX_OP_ASSIGN_LSHIFT,
222
223
/// Right (decimal) shift assignment operator.
224
BC_LEX_OP_ASSIGN_RSHIFT,
225
226
#endif // BC_ENABLE_EXTRA_MATH
227
#endif // BC_ENABLED
228
229
/// Assignment operator.
230
BC_LEX_OP_ASSIGN,
231
232
/// Newline.
233
BC_LEX_NLINE,
234
235
/// Whitespace.
236
BC_LEX_WHITESPACE,
237
238
/// Left parenthesis.
239
BC_LEX_LPAREN,
240
241
/// Right parenthesis.
242
BC_LEX_RPAREN,
243
244
/// Left bracket.
245
BC_LEX_LBRACKET,
246
247
/// Comma.
248
BC_LEX_COMMA,
249
250
/// Right bracket.
251
BC_LEX_RBRACKET,
252
253
/// Left brace.
254
BC_LEX_LBRACE,
255
256
/// Semicolon.
257
BC_LEX_SCOLON,
258
259
/// Right brace.
260
BC_LEX_RBRACE,
261
262
/// String.
263
BC_LEX_STR,
264
265
/// Identifier/name.
266
BC_LEX_NAME,
267
268
/// Constant number.
269
BC_LEX_NUMBER,
270
271
// These keywords are in the order they are in for a reason. Don't change
272
// the order unless you want a bunch of weird failures in the test suite.
273
// In fact, almost all of these tokens are in a specific order for a reason.
274
275
#if BC_ENABLED
276
277
/// bc auto keyword.
278
BC_LEX_KW_AUTO,
279
280
/// bc break keyword.
281
BC_LEX_KW_BREAK,
282
283
/// bc continue keyword.
284
BC_LEX_KW_CONTINUE,
285
286
/// bc define keyword.
287
BC_LEX_KW_DEFINE,
288
289
/// bc for keyword.
290
BC_LEX_KW_FOR,
291
292
/// bc if keyword.
293
BC_LEX_KW_IF,
294
295
/// bc limits keyword.
296
BC_LEX_KW_LIMITS,
297
298
/// bc return keyword.
299
BC_LEX_KW_RETURN,
300
301
/// bc while keyword.
302
BC_LEX_KW_WHILE,
303
304
/// bc halt keyword.
305
BC_LEX_KW_HALT,
306
307
/// bc last keyword.
308
BC_LEX_KW_LAST,
309
310
#endif // BC_ENABLED
311
312
/// bc ibase keyword.
313
BC_LEX_KW_IBASE,
314
315
/// bc obase keyword.
316
BC_LEX_KW_OBASE,
317
318
/// bc scale keyword.
319
BC_LEX_KW_SCALE,
320
321
#if BC_ENABLE_EXTRA_MATH
322
323
/// bc seed keyword.
324
BC_LEX_KW_SEED,
325
326
#endif // BC_ENABLE_EXTRA_MATH
327
328
/// bc length keyword.
329
BC_LEX_KW_LENGTH,
330
331
/// bc print keyword.
332
BC_LEX_KW_PRINT,
333
334
/// bc sqrt keyword.
335
BC_LEX_KW_SQRT,
336
337
/// bc abs keyword.
338
BC_LEX_KW_ABS,
339
340
/// bc is_number keyword.
341
BC_LEX_KW_IS_NUMBER,
342
343
/// bc is_string keyword.
344
BC_LEX_KW_IS_STRING,
345
346
#if BC_ENABLE_EXTRA_MATH
347
348
/// bc irand keyword.
349
BC_LEX_KW_IRAND,
350
351
#endif // BC_ENABLE_EXTRA_MATH
352
353
/// bc asciffy keyword.
354
BC_LEX_KW_ASCIIFY,
355
356
/// bc modexp keyword.
357
BC_LEX_KW_MODEXP,
358
359
/// bc divmod keyword.
360
BC_LEX_KW_DIVMOD,
361
362
/// bc quit keyword.
363
BC_LEX_KW_QUIT,
364
365
/// bc read keyword.
366
BC_LEX_KW_READ,
367
368
#if BC_ENABLE_EXTRA_MATH
369
370
/// bc rand keyword.
371
BC_LEX_KW_RAND,
372
373
#endif // BC_ENABLE_EXTRA_MATH
374
375
/// bc maxibase keyword.
376
BC_LEX_KW_MAXIBASE,
377
378
/// bc maxobase keyword.
379
BC_LEX_KW_MAXOBASE,
380
381
/// bc maxscale keyword.
382
BC_LEX_KW_MAXSCALE,
383
384
#if BC_ENABLE_EXTRA_MATH
385
386
/// bc maxrand keyword.
387
BC_LEX_KW_MAXRAND,
388
389
#endif // BC_ENABLE_EXTRA_MATH
390
391
/// bc line_length keyword.
392
BC_LEX_KW_LINE_LENGTH,
393
394
#if BC_ENABLED
395
396
/// bc global_stacks keyword.
397
BC_LEX_KW_GLOBAL_STACKS,
398
399
#endif // BC_ENABLED
400
401
/// bc leading_zero keyword.
402
BC_LEX_KW_LEADING_ZERO,
403
404
/// bc stream keyword.
405
BC_LEX_KW_STREAM,
406
407
/// bc else keyword.
408
BC_LEX_KW_ELSE,
409
410
#if DC_ENABLED
411
412
/// dc extended registers keyword.
413
BC_LEX_EXTENDED_REGISTERS,
414
415
/// A special token for dc to calculate equal without a register.
416
BC_LEX_EQ_NO_REG,
417
418
/// Colon (array) operator.
419
BC_LEX_COLON,
420
421
/// Execute command.
422
BC_LEX_EXECUTE,
423
424
/// Print stack command.
425
BC_LEX_PRINT_STACK,
426
427
/// Clear stack command.
428
BC_LEX_CLEAR_STACK,
429
430
/// Register stack level command.
431
BC_LEX_REG_STACK_LEVEL,
432
433
/// Main stack level command.
434
BC_LEX_STACK_LEVEL,
435
436
/// Duplicate command.
437
BC_LEX_DUPLICATE,
438
439
/// Swap (reverse) command.
440
BC_LEX_SWAP,
441
442
/// Pop (remove) command.
443
BC_LEX_POP,
444
445
/// Store ibase command.
446
BC_LEX_STORE_IBASE,
447
448
/// Store obase command.
449
BC_LEX_STORE_OBASE,
450
451
/// Store scale command.
452
BC_LEX_STORE_SCALE,
453
454
#if BC_ENABLE_EXTRA_MATH
455
456
/// Store seed command.
457
BC_LEX_STORE_SEED,
458
459
#endif // BC_ENABLE_EXTRA_MATH
460
461
/// Load variable onto stack command.
462
BC_LEX_LOAD,
463
464
/// Pop off of variable stack onto results stack command.
465
BC_LEX_LOAD_POP,
466
467
/// Push onto variable stack command.
468
BC_LEX_STORE_PUSH,
469
470
/// Print with pop command.
471
BC_LEX_PRINT_POP,
472
473
/// Parameterized quit command.
474
BC_LEX_NQUIT,
475
476
/// Execution stack depth command.
477
BC_LEX_EXEC_STACK_LENGTH,
478
479
/// Scale of number command. This is needed specifically for dc because bc
480
/// parses the scale function in parts.
481
BC_LEX_SCALE_FACTOR,
482
483
/// Array length command. This is needed specifically for dc because bc
484
/// just reuses its length keyword.
485
BC_LEX_ARRAY_LENGTH,
486
487
#endif // DC_ENABLED
488
489
} BcLexType;
490
491
struct BcLex;
492
493
/**
494
* A function pointer to call when another token is needed. Mostly called by the
495
* parser.
496
* @param l The lexer.
497
*/
498
typedef void (*BcLexNext)(struct BcLex* l);
499
500
/// The lexer.
501
typedef struct BcLex
502
{
503
/// A pointer to the text to lex.
504
const char* buf;
505
506
/// The current index into buf.
507
size_t i;
508
509
/// The current line.
510
size_t line;
511
512
/// The length of buf.
513
size_t len;
514
515
/// The current token.
516
BcLexType t;
517
518
/// The previous token.
519
BcLexType last;
520
521
/// A string to store extra data for tokens. For example, the @a BC_LEX_STR
522
/// token really needs to store the actual string, and numbers also need the
523
/// string.
524
BcVec str;
525
526
/// The mode the lexer is in.
527
BcMode mode;
528
529
} BcLex;
530
531
/**
532
* Initializes a lexer.
533
* @param l The lexer to initialize.
534
*/
535
void
536
bc_lex_init(BcLex* l);
537
538
/**
539
* Frees a lexer. This is not guarded by #if BC_DEBUG because a separate
540
* parser is created at runtime to parse read() expressions and dc strings, and
541
* that parser needs a lexer.
542
* @param l The lexer to free.
543
*/
544
void
545
bc_lex_free(BcLex* l);
546
547
/**
548
* Sets the filename that the lexer will be lexing.
549
* @param l The lexer.
550
* @param file The filename that the lexer will lex.
551
*/
552
void
553
bc_lex_file(BcLex* l, const char* file);
554
555
/**
556
* Sets the text the lexer will lex.
557
* @param l The lexer.
558
* @param text The text to lex.
559
* @param mode The mode to lex in.
560
*/
561
void
562
bc_lex_text(BcLex* l, const char* text, BcMode mode);
563
564
/**
565
* Generic next function for the parser to call. It takes care of calling the
566
* correct @a BcLexNext function and consuming whitespace.
567
* @param l The lexer.
568
*/
569
void
570
bc_lex_next(BcLex* l);
571
572
/**
573
* Lexes a line comment (one beginning with '#' and going to a newline).
574
* @param l The lexer.
575
*/
576
void
577
bc_lex_lineComment(BcLex* l);
578
579
/**
580
* Lexes a general comment (C-style comment).
581
* @param l The lexer.
582
*/
583
void
584
bc_lex_comment(BcLex* l);
585
586
/**
587
* Lexes whitespace, finding as much as possible.
588
* @param l The lexer.
589
*/
590
void
591
bc_lex_whitespace(BcLex* l);
592
593
/**
594
* Lexes a number that begins with char @a start. This takes care of parsing
595
* numbers in scientific and engineering notations.
596
* @param l The lexer.
597
* @param start The starting char of the number. To detect a number and call
598
* this function, the lexer had to eat the first char. It fixes
599
* that by passing it in.
600
*/
601
void
602
bc_lex_number(BcLex* l, char start);
603
604
/**
605
* Lexes a name/identifier.
606
* @param l The lexer.
607
*/
608
void
609
bc_lex_name(BcLex* l);
610
611
/**
612
* Lexes common whitespace characters.
613
* @param l The lexer.
614
* @param c The character to lex.
615
*/
616
void
617
bc_lex_commonTokens(BcLex* l, char c);
618
619
/**
620
* Throws a parse error because char @a c was invalid.
621
* @param l The lexer.
622
* @param c The problem character.
623
*/
624
void
625
bc_lex_invalidChar(BcLex* l, char c);
626
627
/**
628
* Reads a line from stdin and puts it into the lexer's buffer.
629
* @param l The lexer.
630
*/
631
bool
632
bc_lex_readLine(BcLex* l);
633
634
#endif // BC_LEX_H
635
636