CoCalc -- lex.c

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/bc/src/lex.c
³⁹⁵⁰⁷ views
1
/*
2
 * *****************************************************************************
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 *
6
 * Copyright (c) 2018-2025 Gavin D. Howard and contributors.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions are met:
10
 *
11
 * * Redistributions of source code must retain the above copyright notice, this
12
 *   list of conditions and the following disclaimer.
13
 *
14
 * * Redistributions in binary form must reproduce the above copyright notice,
15
 *   this list of conditions and the following disclaimer in the documentation
16
 *   and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
 * POSSIBILITY OF SUCH DAMAGE.
29
 *
30
 * *****************************************************************************
31
 *
32
 * Common code for the lexers.
33
 *
34
 */
35

36
#include <assert.h>
37
#include <ctype.h>
38
#include <stdbool.h>
39
#include <string.h>
40

41
#include <lex.h>
42
#include <vm.h>
43
#include <bc.h>
44

45
void
46
bc_lex_invalidChar(BcLex* l, char c)
47
{
48
	l->t = BC_LEX_INVALID;
49
	bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
50
}
51

52
void
53
bc_lex_lineComment(BcLex* l)
54
{
55
	l->t = BC_LEX_WHITESPACE;
56
	while (l->i < l->len && l->buf[l->i] != '\n')
57
	{
58
		l->i += 1;
59
	}
60
}
61

62
void
63
bc_lex_comment(BcLex* l)
64
{
65
	size_t i, nlines = 0;
66
	const char* buf;
67
	bool end = false, got_more;
68
	char c;
69

70
	l->i += 1;
71
	l->t = BC_LEX_WHITESPACE;
72

73
	// This loop is complex because it might need to request more data from
74
	// stdin if the comment is not ended. This loop is taken until the comment
75
	// is finished or we have EOF.
76
	do
77
	{
78
		buf = l->buf;
79
		got_more = false;
80

81
		// If we are in stdin mode, the buffer must be the one used for stdin.
82
#if !BC_ENABLE_OSSFUZZ
83
		assert(vm->mode != BC_MODE_STDIN || buf == vm->buffer.v);
84
#endif // !BC_ENABLE_OSSFUZZ
85

86
		// Find the end of the comment.
87
		for (i = l->i; !end; i += !end)
88
		{
89
			// While we don't have an asterisk, eat, but increment nlines.
90
			for (; (c = buf[i]) && c != '*'; ++i)
91
			{
92
				nlines += (c == '\n');
93
			}
94

95
			// If this is true, we need to request more data.
96
			if (BC_ERR(!c || buf[i + 1] == '\0'))
97
			{
98
#if !BC_ENABLE_OSSFUZZ
99
				// Read more, if possible.
100
				if (!vm->eof && l->mode != BC_MODE_FILE)
101
				{
102
					got_more = bc_lex_readLine(l);
103
				}
104
#endif // !BC_ENABLE_OSSFUZZ
105

106
				break;
107
			}
108

109
			// If this turns true, we found the end. Yay!
110
			end = (buf[i + 1] == '/');
111
		}
112
	}
113
	while (got_more && !end);
114

115
	// If we didn't find the end, barf.
116
	if (!end)
117
	{
118
		l->i = i;
119
		bc_lex_err(l, BC_ERR_PARSE_COMMENT);
120
	}
121

122
	l->i = i + 2;
123
	l->line += nlines;
124
}
125

126
void
127
bc_lex_whitespace(BcLex* l)
128
{
129
	char c;
130

131
	l->t = BC_LEX_WHITESPACE;
132

133
	// Eat. We don't eat newlines because they can be special.
134
	for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i])
135
	{
136
		continue;
137
	}
138
}
139

140
void
141
bc_lex_commonTokens(BcLex* l, char c)
142
{
143
	if (!c) l->t = BC_LEX_EOF;
144
	else if (c == '\n') l->t = BC_LEX_NLINE;
145
	else bc_lex_whitespace(l);
146
}
147

148
/**
149
 * Parses a number.
150
 * @param l         The lexer.
151
 * @param start     The start character.
152
 * @param int_only  Whether this function should only look for an integer. This
153
 *                  is used to implement the exponent of scientific notation.
154
 */
155
static size_t
156
bc_lex_num(BcLex* l, char start, bool int_only)
157
{
158
	const char* buf = l->buf + l->i;
159
	size_t i;
160
	char c;
161
	bool last_pt, pt = (start == '.');
162

163
	// This loop looks complex. It is not. It is asking if the character is not
164
	// a nul byte and it if it a valid num character based on what we have found
165
	// thus far, or whether it is a backslash followed by a newline. I can do
166
	// i+1 on the buffer because the buffer must have a nul byte.
167
	for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) ||
168
	                             (c == '\\' && buf[i + 1] == '\n'));
169
	     ++i)
170
	{
171
		// I don't need to test that the next character is a newline because
172
		// the loop condition above ensures that.
173
		if (c == '\\')
174
		{
175
			i += 2;
176

177
			// Make sure to eat whitespace at the beginning of the line.
178
			while (isspace(buf[i]) && buf[i] != '\n')
179
			{
180
				i += 1;
181
			}
182

183
			c = buf[i];
184

185
			// If the next character is not a number character, bail.
186
			if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break;
187
		}
188

189
		// Did we find the radix point?
190
		last_pt = (c == '.');
191

192
		// If we did, and we already have one, then break because it's not part
193
		// of this number.
194
		if (pt && last_pt) break;
195

196
		// Set whether we have found a radix point.
197
		pt = pt || last_pt;
198

199
		bc_vec_push(&l->str, &c);
200
	}
201

202
	return i;
203
}
204

205
void
206
bc_lex_number(BcLex* l, char start)
207
{
208
	l->t = BC_LEX_NUMBER;
209

210
	// Make sure the string is clear.
211
	bc_vec_popAll(&l->str);
212
	bc_vec_push(&l->str, &start);
213

214
	// Parse the number.
215
	l->i += bc_lex_num(l, start, false);
216

217
#if BC_ENABLE_EXTRA_MATH
218
	{
219
		char c = l->buf[l->i];
220

221
		// Do we have a number in scientific notation?
222
		if (c == 'e')
223
		{
224
#if BC_ENABLED
225
			// Barf for POSIX.
226
			if (BC_IS_POSIX) bc_lex_err(l, BC_ERR_POSIX_EXP_NUM);
227
#endif // BC_ENABLED
228

229
			// Push the e.
230
			bc_vec_push(&l->str, &c);
231
			l->i += 1;
232
			c = l->buf[l->i];
233

234
			// Check for negative specifically because bc_lex_num() does not.
235
			if (c == BC_LEX_NEG_CHAR)
236
			{
237
				bc_vec_push(&l->str, &c);
238
				l->i += 1;
239
				c = l->buf[l->i];
240
			}
241

242
			// We must have a number character, so barf if not.
243
			if (BC_ERR(!BC_LEX_NUM_CHAR(c, false, true)))
244
			{
245
				bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
246
			}
247

248
			// Parse the exponent.
249
			l->i += bc_lex_num(l, 0, true);
250
		}
251
	}
252
#endif // BC_ENABLE_EXTRA_MATH
253

254
	bc_vec_pushByte(&l->str, '\0');
255
}
256

257
void
258
bc_lex_name(BcLex* l)
259
{
260
	size_t i = 0;
261
	const char* buf = l->buf + l->i - 1;
262
	char c = buf[i];
263

264
	l->t = BC_LEX_NAME;
265

266
	// Should be obvious. It's looking for valid characters.
267
	while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_')
268
	{
269
		c = buf[++i];
270
	}
271

272
	// Set the string to the identifier.
273
	bc_vec_string(&l->str, i, buf);
274

275
	// Increment the index. We minus 1 because it has already been incremented.
276
	l->i += i - 1;
277
}
278

279
void
280
bc_lex_init(BcLex* l)
281
{
282
	BC_SIG_ASSERT_LOCKED;
283
	assert(l != NULL);
284
	bc_vec_init(&l->str, sizeof(char), BC_DTOR_NONE);
285
}
286

287
void
288
bc_lex_free(BcLex* l)
289
{
290
	BC_SIG_ASSERT_LOCKED;
291
	assert(l != NULL);
292
	bc_vec_free(&l->str);
293
}
294

295
void
296
bc_lex_file(BcLex* l, const char* file)
297
{
298
	assert(l != NULL && file != NULL);
299
	l->line = 1;
300
	vm->file = file;
301
}
302

303
void
304
bc_lex_next(BcLex* l)
305
{
306
	BC_SIG_ASSERT_LOCKED;
307

308
	assert(l != NULL);
309

310
	l->last = l->t;
311

312
	// If this wasn't here, the line number would be off.
313
	l->line += (l->i != 0 && l->buf[l->i - 1] == '\n');
314

315
	// If the last token was EOF, someone called this one too many times.
316
	if (BC_ERR(l->last == BC_LEX_EOF)) bc_lex_err(l, BC_ERR_PARSE_EOF);
317

318
	l->t = BC_LEX_EOF;
319

320
	// We are done if this is true.
321
	if (l->i == l->len) return;
322

323
	// Loop until failure or we don't have whitespace. This
324
	// is so the parser doesn't get inundated with whitespace.
325
	do
326
	{
327
		vm->next(l);
328
	}
329
	while (l->t == BC_LEX_WHITESPACE);
330
}
331

332
/**
333
 * Updates the buffer and len so that they are not invalidated when the stdin
334
 * buffer grows.
335
 * @param l     The lexer.
336
 * @param text  The text.
337
 * @param len   The length of the text.
338
 */
339
static void
340
bc_lex_fixText(BcLex* l, const char* text, size_t len)
341
{
342
	l->buf = text;
343
	l->len = len;
344
}
345

346
bool
347
bc_lex_readLine(BcLex* l)
348
{
349
	bool good;
350

351
	// These are reversed because they should be already locked, but
352
	// bc_vm_readLine() needs them to be unlocked.
353
	BC_SIG_UNLOCK;
354

355
	// Make sure we read from the appropriate place.
356
	switch (l->mode)
357
	{
358
		case BC_MODE_EXPRS:
359
		{
360
			good = bc_vm_readBuf(false);
361
			break;
362
		}
363

364
		case BC_MODE_FILE:
365
		{
366
			good = false;
367
			break;
368
		}
369

370
#if !BC_ENABLE_OSSFUZZ
371

372
		case BC_MODE_STDIN:
373
		{
374
			good = bc_vm_readLine(false);
375
			break;
376
		}
377

378
#endif // !BC_ENABLE_OSSFUZZ
379

380
#ifdef __GNUC__
381
#ifndef __clang__
382
		default:
383
		{
384
			// We should never get here.
385
			abort();
386
		}
387
#endif // __clang__
388
#endif // __GNUC__
389
	}
390

391
	BC_SIG_LOCK;
392

393
	bc_lex_fixText(l, vm->buffer.v, vm->buffer.len - 1);
394

395
	return good;
396
}
397

398
void
399
bc_lex_text(BcLex* l, const char* text, BcMode mode)
400
{
401
	BC_SIG_ASSERT_LOCKED;
402

403
	assert(l != NULL && text != NULL);
404

405
	bc_lex_fixText(l, text, strlen(text));
406
	l->i = 0;
407
	l->t = l->last = BC_LEX_INVALID;
408
	l->mode = mode;
409

410
	bc_lex_next(l);
411
}
412

413
Product

Resources

Company