CoCalc -- dc

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/bc/src/dc_lex.c
³⁹⁵⁰⁷ views
1
/*
2
 * *****************************************************************************
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 *
6
 * Copyright (c) 2018-2025 Gavin D. Howard and contributors.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions are met:
10
 *
11
 * * Redistributions of source code must retain the above copyright notice, this
12
 *   list of conditions and the following disclaimer.
13
 *
14
 * * Redistributions in binary form must reproduce the above copyright notice,
15
 *   this list of conditions and the following disclaimer in the documentation
16
 *   and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
 * POSSIBILITY OF SUCH DAMAGE.
29
 *
30
 * *****************************************************************************
31
 *
32
 * The lexer for dc.
33
 *
34
 */
35

36
#if DC_ENABLED
37

38
#include <ctype.h>
39

40
#include <dc.h>
41
#include <vm.h>
42

43
bool
44
dc_lex_negCommand(BcLex* l)
45
{
46
	char c = l->buf[l->i];
47
	return !BC_LEX_NUM_CHAR(c, false, false);
48
}
49

50
/**
51
 * Processes a dc command that needs a register. This is where the
52
 * extended-register extension is implemented.
53
 * @param l  The lexer.
54
 */
55
static void
56
dc_lex_register(BcLex* l)
57
{
58
	// If extended register is enabled and the character is whitespace...
59
	if (DC_X && isspace(l->buf[l->i - 1]))
60
	{
61
		char c;
62

63
		// Eat the whitespace.
64
		bc_lex_whitespace(l);
65
		c = l->buf[l->i];
66

67
		// Check for a letter or underscore.
68
		if (BC_ERR(!isalpha(c) && c != '_'))
69
		{
70
			bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
71
		}
72

73
		// Parse a normal identifier.
74
		l->i += 1;
75
		bc_lex_name(l);
76
	}
77
	else
78
	{
79
		// I don't allow newlines because newlines are used for controlling when
80
		// execution happens, and allowing newlines would just be complex.
81
		if (BC_ERR(l->buf[l->i - 1] == '\n'))
82
		{
83
			bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]);
84
		}
85

86
		// Set the lexer string and token.
87
		bc_vec_popAll(&l->str);
88
		bc_vec_pushByte(&l->str, (uchar) l->buf[l->i - 1]);
89
		bc_vec_pushByte(&l->str, '\0');
90
		l->t = BC_LEX_NAME;
91
	}
92
}
93

94
/**
95
 * Parses a dc string. Since dc's strings need to check for balanced brackets,
96
 * we can't just parse bc and dc strings with different start and end
97
 * characters. Oh, and dc strings need to check for escaped brackets.
98
 * @param l  The lexer.
99
 */
100
static void
101
dc_lex_string(BcLex* l)
102
{
103
	size_t depth, nls, i;
104
	char c;
105
	bool got_more;
106

107
	// Set the token and clear the string.
108
	l->t = BC_LEX_STR;
109
	bc_vec_popAll(&l->str);
110

111
	do
112
	{
113
		depth = 1;
114
		nls = 0;
115
		got_more = false;
116

117
#if !BC_ENABLE_OSSFUZZ
118
		assert(l->mode != BC_MODE_STDIN || l->buf == vm->buffer.v);
119
#endif // !BC_ENABLE_OSSFUZZ
120

121
		// This is the meat. As long as we don't run into the NUL byte, and we
122
		// have "depth", which means we haven't completely balanced brackets
123
		// yet, we continue eating the string.
124
		for (i = l->i; (c = l->buf[i]) && depth; ++i)
125
		{
126
			// Check for escaped brackets and set the depths as appropriate.
127
			if (c == '\\')
128
			{
129
				c = l->buf[++i];
130
				if (!c) break;
131
			}
132
			else
133
			{
134
				depth += (c == '[');
135
				depth -= (c == ']');
136
			}
137

138
			// We want to adjust the line in the lexer as necessary.
139
			nls += (c == '\n');
140

141
			if (depth) bc_vec_push(&l->str, &c);
142
		}
143

144
		if (BC_ERR(c == '\0' && depth))
145
		{
146
			if (!vm->eof && l->mode != BC_MODE_FILE)
147
			{
148
				got_more = bc_lex_readLine(l);
149
			}
150

151
			if (got_more)
152
			{
153
				bc_vec_popAll(&l->str);
154
			}
155
		}
156
	}
157
	while (got_more && depth);
158

159
	// Obviously, if we didn't balance, that's an error.
160
	if (BC_ERR(c == '\0' && depth))
161
	{
162
		l->i = i;
163
		bc_lex_err(l, BC_ERR_PARSE_STRING);
164
	}
165

166
	bc_vec_pushByte(&l->str, '\0');
167

168
	l->i = i;
169
	l->line += nls;
170
}
171

172
/**
173
 * Lexes a dc token. This is the dc implementation of BcLexNext.
174
 * @param l  The lexer.
175
 */
176
void
177
dc_lex_token(BcLex* l)
178
{
179
	char c = l->buf[l->i++], c2;
180
	size_t i;
181

182
	BC_SIG_ASSERT_LOCKED;
183

184
	// If the last token was a command that needs a register, we need to parse a
185
	// register, so do so.
186
	for (i = 0; i < dc_lex_regs_len; ++i)
187
	{
188
		// If the token is a register token, take care of it and return.
189
		if (l->last == dc_lex_regs[i])
190
		{
191
			dc_lex_register(l);
192
			return;
193
		}
194
	}
195

196
	// These lines are for tokens that easily correspond to one character. We
197
	// just set the token.
198
	if (c >= '"' && c <= '~' &&
199
	    (l->t = dc_lex_tokens[(c - '"')]) != BC_LEX_INVALID)
200
	{
201
		return;
202
	}
203

204
	// This is the workhorse of the lexer when more complicated things are
205
	// needed.
206
	switch (c)
207
	{
208
		case '\0':
209
		case '\n':
210
		case '\t':
211
		case '\v':
212
		case '\f':
213
		case '\r':
214
		case ' ':
215
		{
216
			bc_lex_commonTokens(l, c);
217
			break;
218
		}
219

220
		// We don't have the ! command, so we always expect certain things
221
		// after the exclamation point.
222
		case '!':
223
		{
224
			c2 = l->buf[l->i];
225

226
			if (c2 == '=') l->t = BC_LEX_OP_REL_NE;
227
			else if (c2 == '<') l->t = BC_LEX_OP_REL_LE;
228
			else if (c2 == '>') l->t = BC_LEX_OP_REL_GE;
229
			else bc_lex_invalidChar(l, c);
230

231
			l->i += 1;
232

233
			break;
234
		}
235

236
		case '#':
237
		{
238
			bc_lex_lineComment(l);
239
			break;
240
		}
241

242
		case '.':
243
		{
244
			c2 = l->buf[l->i];
245

246
			// If the character after is a number, this dot is part of a number.
247
			// Otherwise, it's the BSD dot (equivalent to last).
248
			if (BC_NO_ERR(BC_LEX_NUM_CHAR(c2, true, false)))
249
			{
250
				bc_lex_number(l, c);
251
			}
252
			else bc_lex_invalidChar(l, c);
253

254
			break;
255
		}
256

257
		case '0':
258
		case '1':
259
		case '2':
260
		case '3':
261
		case '4':
262
		case '5':
263
		case '6':
264
		case '7':
265
		case '8':
266
		case '9':
267
		case 'A':
268
		case 'B':
269
		case 'C':
270
		case 'D':
271
		case 'E':
272
		case 'F':
273
		{
274
			bc_lex_number(l, c);
275
			break;
276
		}
277

278
		case 'g':
279
		{
280
			c2 = l->buf[l->i];
281

282
			if (c2 == 'l') l->t = BC_LEX_KW_LINE_LENGTH;
283
			else if (c2 == 'x') l->t = BC_LEX_EXTENDED_REGISTERS;
284
			else if (c2 == 'z') l->t = BC_LEX_KW_LEADING_ZERO;
285
			else bc_lex_invalidChar(l, c2);
286

287
			l->i += 1;
288

289
			break;
290
		}
291

292
		case '[':
293
		{
294
			dc_lex_string(l);
295
			break;
296
		}
297

298
		default:
299
		{
300
			bc_lex_invalidChar(l, c);
301
		}
302
	}
303
}
304
#endif // DC_ENABLED
305

306
Product

Resources

Company