Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/bc/src/dc_lex.c
39507 views
1
/*
2
* *****************************************************************************
3
*
4
* SPDX-License-Identifier: BSD-2-Clause
5
*
6
* Copyright (c) 2018-2025 Gavin D. Howard and contributors.
7
*
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions are met:
10
*
11
* * Redistributions of source code must retain the above copyright notice, this
12
* list of conditions and the following disclaimer.
13
*
14
* * Redistributions in binary form must reproduce the above copyright notice,
15
* this list of conditions and the following disclaimer in the documentation
16
* and/or other materials provided with the distribution.
17
*
18
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
* POSSIBILITY OF SUCH DAMAGE.
29
*
30
* *****************************************************************************
31
*
32
* The lexer for dc.
33
*
34
*/
35
36
#if DC_ENABLED
37
38
#include <ctype.h>
39
40
#include <dc.h>
41
#include <vm.h>
42
43
bool
44
dc_lex_negCommand(BcLex* l)
45
{
46
char c = l->buf[l->i];
47
return !BC_LEX_NUM_CHAR(c, false, false);
48
}
49
50
/**
51
* Processes a dc command that needs a register. This is where the
52
* extended-register extension is implemented.
53
* @param l The lexer.
54
*/
55
static void
56
dc_lex_register(BcLex* l)
57
{
58
// If extended register is enabled and the character is whitespace...
59
if (DC_X && isspace(l->buf[l->i - 1]))
60
{
61
char c;
62
63
// Eat the whitespace.
64
bc_lex_whitespace(l);
65
c = l->buf[l->i];
66
67
// Check for a letter or underscore.
68
if (BC_ERR(!isalpha(c) && c != '_'))
69
{
70
bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
71
}
72
73
// Parse a normal identifier.
74
l->i += 1;
75
bc_lex_name(l);
76
}
77
else
78
{
79
// I don't allow newlines because newlines are used for controlling when
80
// execution happens, and allowing newlines would just be complex.
81
if (BC_ERR(l->buf[l->i - 1] == '\n'))
82
{
83
bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]);
84
}
85
86
// Set the lexer string and token.
87
bc_vec_popAll(&l->str);
88
bc_vec_pushByte(&l->str, (uchar) l->buf[l->i - 1]);
89
bc_vec_pushByte(&l->str, '\0');
90
l->t = BC_LEX_NAME;
91
}
92
}
93
94
/**
95
* Parses a dc string. Since dc's strings need to check for balanced brackets,
96
* we can't just parse bc and dc strings with different start and end
97
* characters. Oh, and dc strings need to check for escaped brackets.
98
* @param l The lexer.
99
*/
100
static void
101
dc_lex_string(BcLex* l)
102
{
103
size_t depth, nls, i;
104
char c;
105
bool got_more;
106
107
// Set the token and clear the string.
108
l->t = BC_LEX_STR;
109
bc_vec_popAll(&l->str);
110
111
do
112
{
113
depth = 1;
114
nls = 0;
115
got_more = false;
116
117
#if !BC_ENABLE_OSSFUZZ
118
assert(l->mode != BC_MODE_STDIN || l->buf == vm->buffer.v);
119
#endif // !BC_ENABLE_OSSFUZZ
120
121
// This is the meat. As long as we don't run into the NUL byte, and we
122
// have "depth", which means we haven't completely balanced brackets
123
// yet, we continue eating the string.
124
for (i = l->i; (c = l->buf[i]) && depth; ++i)
125
{
126
// Check for escaped brackets and set the depths as appropriate.
127
if (c == '\\')
128
{
129
c = l->buf[++i];
130
if (!c) break;
131
}
132
else
133
{
134
depth += (c == '[');
135
depth -= (c == ']');
136
}
137
138
// We want to adjust the line in the lexer as necessary.
139
nls += (c == '\n');
140
141
if (depth) bc_vec_push(&l->str, &c);
142
}
143
144
if (BC_ERR(c == '\0' && depth))
145
{
146
if (!vm->eof && l->mode != BC_MODE_FILE)
147
{
148
got_more = bc_lex_readLine(l);
149
}
150
151
if (got_more)
152
{
153
bc_vec_popAll(&l->str);
154
}
155
}
156
}
157
while (got_more && depth);
158
159
// Obviously, if we didn't balance, that's an error.
160
if (BC_ERR(c == '\0' && depth))
161
{
162
l->i = i;
163
bc_lex_err(l, BC_ERR_PARSE_STRING);
164
}
165
166
bc_vec_pushByte(&l->str, '\0');
167
168
l->i = i;
169
l->line += nls;
170
}
171
172
/**
173
* Lexes a dc token. This is the dc implementation of BcLexNext.
174
* @param l The lexer.
175
*/
176
void
177
dc_lex_token(BcLex* l)
178
{
179
char c = l->buf[l->i++], c2;
180
size_t i;
181
182
BC_SIG_ASSERT_LOCKED;
183
184
// If the last token was a command that needs a register, we need to parse a
185
// register, so do so.
186
for (i = 0; i < dc_lex_regs_len; ++i)
187
{
188
// If the token is a register token, take care of it and return.
189
if (l->last == dc_lex_regs[i])
190
{
191
dc_lex_register(l);
192
return;
193
}
194
}
195
196
// These lines are for tokens that easily correspond to one character. We
197
// just set the token.
198
if (c >= '"' && c <= '~' &&
199
(l->t = dc_lex_tokens[(c - '"')]) != BC_LEX_INVALID)
200
{
201
return;
202
}
203
204
// This is the workhorse of the lexer when more complicated things are
205
// needed.
206
switch (c)
207
{
208
case '\0':
209
case '\n':
210
case '\t':
211
case '\v':
212
case '\f':
213
case '\r':
214
case ' ':
215
{
216
bc_lex_commonTokens(l, c);
217
break;
218
}
219
220
// We don't have the ! command, so we always expect certain things
221
// after the exclamation point.
222
case '!':
223
{
224
c2 = l->buf[l->i];
225
226
if (c2 == '=') l->t = BC_LEX_OP_REL_NE;
227
else if (c2 == '<') l->t = BC_LEX_OP_REL_LE;
228
else if (c2 == '>') l->t = BC_LEX_OP_REL_GE;
229
else bc_lex_invalidChar(l, c);
230
231
l->i += 1;
232
233
break;
234
}
235
236
case '#':
237
{
238
bc_lex_lineComment(l);
239
break;
240
}
241
242
case '.':
243
{
244
c2 = l->buf[l->i];
245
246
// If the character after is a number, this dot is part of a number.
247
// Otherwise, it's the BSD dot (equivalent to last).
248
if (BC_NO_ERR(BC_LEX_NUM_CHAR(c2, true, false)))
249
{
250
bc_lex_number(l, c);
251
}
252
else bc_lex_invalidChar(l, c);
253
254
break;
255
}
256
257
case '0':
258
case '1':
259
case '2':
260
case '3':
261
case '4':
262
case '5':
263
case '6':
264
case '7':
265
case '8':
266
case '9':
267
case 'A':
268
case 'B':
269
case 'C':
270
case 'D':
271
case 'E':
272
case 'F':
273
{
274
bc_lex_number(l, c);
275
break;
276
}
277
278
case 'g':
279
{
280
c2 = l->buf[l->i];
281
282
if (c2 == 'l') l->t = BC_LEX_KW_LINE_LENGTH;
283
else if (c2 == 'x') l->t = BC_LEX_EXTENDED_REGISTERS;
284
else if (c2 == 'z') l->t = BC_LEX_KW_LEADING_ZERO;
285
else bc_lex_invalidChar(l, c2);
286
287
l->i += 1;
288
289
break;
290
}
291
292
case '[':
293
{
294
dc_lex_string(l);
295
break;
296
}
297
298
default:
299
{
300
bc_lex_invalidChar(l, c);
301
}
302
}
303
}
304
#endif // DC_ENABLED
305
306