Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/bc/src/lex.c
39507 views
1
/*
2
* *****************************************************************************
3
*
4
* SPDX-License-Identifier: BSD-2-Clause
5
*
6
* Copyright (c) 2018-2025 Gavin D. Howard and contributors.
7
*
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions are met:
10
*
11
* * Redistributions of source code must retain the above copyright notice, this
12
* list of conditions and the following disclaimer.
13
*
14
* * Redistributions in binary form must reproduce the above copyright notice,
15
* this list of conditions and the following disclaimer in the documentation
16
* and/or other materials provided with the distribution.
17
*
18
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
* POSSIBILITY OF SUCH DAMAGE.
29
*
30
* *****************************************************************************
31
*
32
* Common code for the lexers.
33
*
34
*/
35
36
#include <assert.h>
37
#include <ctype.h>
38
#include <stdbool.h>
39
#include <string.h>
40
41
#include <lex.h>
42
#include <vm.h>
43
#include <bc.h>
44
45
void
46
bc_lex_invalidChar(BcLex* l, char c)
47
{
48
l->t = BC_LEX_INVALID;
49
bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
50
}
51
52
void
53
bc_lex_lineComment(BcLex* l)
54
{
55
l->t = BC_LEX_WHITESPACE;
56
while (l->i < l->len && l->buf[l->i] != '\n')
57
{
58
l->i += 1;
59
}
60
}
61
62
void
63
bc_lex_comment(BcLex* l)
64
{
65
size_t i, nlines = 0;
66
const char* buf;
67
bool end = false, got_more;
68
char c;
69
70
l->i += 1;
71
l->t = BC_LEX_WHITESPACE;
72
73
// This loop is complex because it might need to request more data from
74
// stdin if the comment is not ended. This loop is taken until the comment
75
// is finished or we have EOF.
76
do
77
{
78
buf = l->buf;
79
got_more = false;
80
81
// If we are in stdin mode, the buffer must be the one used for stdin.
82
#if !BC_ENABLE_OSSFUZZ
83
assert(vm->mode != BC_MODE_STDIN || buf == vm->buffer.v);
84
#endif // !BC_ENABLE_OSSFUZZ
85
86
// Find the end of the comment.
87
for (i = l->i; !end; i += !end)
88
{
89
// While we don't have an asterisk, eat, but increment nlines.
90
for (; (c = buf[i]) && c != '*'; ++i)
91
{
92
nlines += (c == '\n');
93
}
94
95
// If this is true, we need to request more data.
96
if (BC_ERR(!c || buf[i + 1] == '\0'))
97
{
98
#if !BC_ENABLE_OSSFUZZ
99
// Read more, if possible.
100
if (!vm->eof && l->mode != BC_MODE_FILE)
101
{
102
got_more = bc_lex_readLine(l);
103
}
104
#endif // !BC_ENABLE_OSSFUZZ
105
106
break;
107
}
108
109
// If this turns true, we found the end. Yay!
110
end = (buf[i + 1] == '/');
111
}
112
}
113
while (got_more && !end);
114
115
// If we didn't find the end, barf.
116
if (!end)
117
{
118
l->i = i;
119
bc_lex_err(l, BC_ERR_PARSE_COMMENT);
120
}
121
122
l->i = i + 2;
123
l->line += nlines;
124
}
125
126
void
127
bc_lex_whitespace(BcLex* l)
128
{
129
char c;
130
131
l->t = BC_LEX_WHITESPACE;
132
133
// Eat. We don't eat newlines because they can be special.
134
for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i])
135
{
136
continue;
137
}
138
}
139
140
void
141
bc_lex_commonTokens(BcLex* l, char c)
142
{
143
if (!c) l->t = BC_LEX_EOF;
144
else if (c == '\n') l->t = BC_LEX_NLINE;
145
else bc_lex_whitespace(l);
146
}
147
148
/**
149
* Parses a number.
150
* @param l The lexer.
151
* @param start The start character.
152
* @param int_only Whether this function should only look for an integer. This
153
* is used to implement the exponent of scientific notation.
154
*/
155
static size_t
156
bc_lex_num(BcLex* l, char start, bool int_only)
157
{
158
const char* buf = l->buf + l->i;
159
size_t i;
160
char c;
161
bool last_pt, pt = (start == '.');
162
163
// This loop looks complex. It is not. It is asking if the character is not
164
// a nul byte and it if it a valid num character based on what we have found
165
// thus far, or whether it is a backslash followed by a newline. I can do
166
// i+1 on the buffer because the buffer must have a nul byte.
167
for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) ||
168
(c == '\\' && buf[i + 1] == '\n'));
169
++i)
170
{
171
// I don't need to test that the next character is a newline because
172
// the loop condition above ensures that.
173
if (c == '\\')
174
{
175
i += 2;
176
177
// Make sure to eat whitespace at the beginning of the line.
178
while (isspace(buf[i]) && buf[i] != '\n')
179
{
180
i += 1;
181
}
182
183
c = buf[i];
184
185
// If the next character is not a number character, bail.
186
if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break;
187
}
188
189
// Did we find the radix point?
190
last_pt = (c == '.');
191
192
// If we did, and we already have one, then break because it's not part
193
// of this number.
194
if (pt && last_pt) break;
195
196
// Set whether we have found a radix point.
197
pt = pt || last_pt;
198
199
bc_vec_push(&l->str, &c);
200
}
201
202
return i;
203
}
204
205
void
206
bc_lex_number(BcLex* l, char start)
207
{
208
l->t = BC_LEX_NUMBER;
209
210
// Make sure the string is clear.
211
bc_vec_popAll(&l->str);
212
bc_vec_push(&l->str, &start);
213
214
// Parse the number.
215
l->i += bc_lex_num(l, start, false);
216
217
#if BC_ENABLE_EXTRA_MATH
218
{
219
char c = l->buf[l->i];
220
221
// Do we have a number in scientific notation?
222
if (c == 'e')
223
{
224
#if BC_ENABLED
225
// Barf for POSIX.
226
if (BC_IS_POSIX) bc_lex_err(l, BC_ERR_POSIX_EXP_NUM);
227
#endif // BC_ENABLED
228
229
// Push the e.
230
bc_vec_push(&l->str, &c);
231
l->i += 1;
232
c = l->buf[l->i];
233
234
// Check for negative specifically because bc_lex_num() does not.
235
if (c == BC_LEX_NEG_CHAR)
236
{
237
bc_vec_push(&l->str, &c);
238
l->i += 1;
239
c = l->buf[l->i];
240
}
241
242
// We must have a number character, so barf if not.
243
if (BC_ERR(!BC_LEX_NUM_CHAR(c, false, true)))
244
{
245
bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
246
}
247
248
// Parse the exponent.
249
l->i += bc_lex_num(l, 0, true);
250
}
251
}
252
#endif // BC_ENABLE_EXTRA_MATH
253
254
bc_vec_pushByte(&l->str, '\0');
255
}
256
257
void
258
bc_lex_name(BcLex* l)
259
{
260
size_t i = 0;
261
const char* buf = l->buf + l->i - 1;
262
char c = buf[i];
263
264
l->t = BC_LEX_NAME;
265
266
// Should be obvious. It's looking for valid characters.
267
while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_')
268
{
269
c = buf[++i];
270
}
271
272
// Set the string to the identifier.
273
bc_vec_string(&l->str, i, buf);
274
275
// Increment the index. We minus 1 because it has already been incremented.
276
l->i += i - 1;
277
}
278
279
void
280
bc_lex_init(BcLex* l)
281
{
282
BC_SIG_ASSERT_LOCKED;
283
assert(l != NULL);
284
bc_vec_init(&l->str, sizeof(char), BC_DTOR_NONE);
285
}
286
287
void
288
bc_lex_free(BcLex* l)
289
{
290
BC_SIG_ASSERT_LOCKED;
291
assert(l != NULL);
292
bc_vec_free(&l->str);
293
}
294
295
void
296
bc_lex_file(BcLex* l, const char* file)
297
{
298
assert(l != NULL && file != NULL);
299
l->line = 1;
300
vm->file = file;
301
}
302
303
void
304
bc_lex_next(BcLex* l)
305
{
306
BC_SIG_ASSERT_LOCKED;
307
308
assert(l != NULL);
309
310
l->last = l->t;
311
312
// If this wasn't here, the line number would be off.
313
l->line += (l->i != 0 && l->buf[l->i - 1] == '\n');
314
315
// If the last token was EOF, someone called this one too many times.
316
if (BC_ERR(l->last == BC_LEX_EOF)) bc_lex_err(l, BC_ERR_PARSE_EOF);
317
318
l->t = BC_LEX_EOF;
319
320
// We are done if this is true.
321
if (l->i == l->len) return;
322
323
// Loop until failure or we don't have whitespace. This
324
// is so the parser doesn't get inundated with whitespace.
325
do
326
{
327
vm->next(l);
328
}
329
while (l->t == BC_LEX_WHITESPACE);
330
}
331
332
/**
333
* Updates the buffer and len so that they are not invalidated when the stdin
334
* buffer grows.
335
* @param l The lexer.
336
* @param text The text.
337
* @param len The length of the text.
338
*/
339
static void
340
bc_lex_fixText(BcLex* l, const char* text, size_t len)
341
{
342
l->buf = text;
343
l->len = len;
344
}
345
346
bool
347
bc_lex_readLine(BcLex* l)
348
{
349
bool good;
350
351
// These are reversed because they should be already locked, but
352
// bc_vm_readLine() needs them to be unlocked.
353
BC_SIG_UNLOCK;
354
355
// Make sure we read from the appropriate place.
356
switch (l->mode)
357
{
358
case BC_MODE_EXPRS:
359
{
360
good = bc_vm_readBuf(false);
361
break;
362
}
363
364
case BC_MODE_FILE:
365
{
366
good = false;
367
break;
368
}
369
370
#if !BC_ENABLE_OSSFUZZ
371
372
case BC_MODE_STDIN:
373
{
374
good = bc_vm_readLine(false);
375
break;
376
}
377
378
#endif // !BC_ENABLE_OSSFUZZ
379
380
#ifdef __GNUC__
381
#ifndef __clang__
382
default:
383
{
384
// We should never get here.
385
abort();
386
}
387
#endif // __clang__
388
#endif // __GNUC__
389
}
390
391
BC_SIG_LOCK;
392
393
bc_lex_fixText(l, vm->buffer.v, vm->buffer.len - 1);
394
395
return good;
396
}
397
398
void
399
bc_lex_text(BcLex* l, const char* text, BcMode mode)
400
{
401
BC_SIG_ASSERT_LOCKED;
402
403
assert(l != NULL && text != NULL);
404
405
bc_lex_fixText(l, text, strlen(text));
406
l->i = 0;
407
l->t = l->last = BC_LEX_INVALID;
408
l->mode = mode;
409
410
bc_lex_next(l);
411
}
412
413