CoCalc -- gravity

GitHub Repository: epidemian/gravity
Path: blob/master/src/compiler/gravity_lexer.c
¹²¹⁴ views
1
//
2
//  gravity_lexer.c
3
//  gravity
4
//
5
//  Created by Marco Bambini on 30/08/14.
6
//  Copyright (c) 2014 CreoLabs. All rights reserved.
7
//
8

9
// ASCII Table: http://www.theasciicode.com.ar
10

11
#include "gravity_lexer.h"
12
#include "gravity_token.h"
13
#include "gravity_utils.h"
14

15
struct gravity_lexer_t {
16
	const char					*buffer;		// buffer
17
	uint32_t					offset;			// current buffer offset (in bytes)
18
	uint32_t					position;		// current buffer position (in characters)
19
	uint32_t					length;			// buffer length (in bytes)
20
	uint32_t					lineno;			// line counter
21
	uint32_t					colno;			// column counter
22
	uint32_t					fileid;			// current file id
23
	
24
	gtoken_s					token;			// current token
25
	bool						peeking;		// flag to check if a peek operation is in progress
26
	bool						is_static;		// flag to check if buffer is static and must not be freed
27
	gravity_delegate_t			*delegate;		// delegate if any
28
};
29

30
typedef enum {
31
	NUMBER_INTEGER,
32
	NUMBER_HEX,
33
	NUMBER_BIN,
34
	NUMBER_OCT
35
} gravity_number_type;
36

37

38
// LEXER macros
39
#define NEXT					lexer->buffer[lexer->offset++]; ++lexer->position; INC_COL
40
#define PEEK_CURRENT			lexer->buffer[lexer->offset]
41
#define PEEK_NEXT				((lexer->offset < lexer->length) ? lexer->buffer[lexer->offset+1] : 0)
42
#define PEEK_NEXT2				((lexer->offset+1 < lexer->length) ? lexer->buffer[lexer->offset+2] : 0)
43
#define INC_LINE				++lexer->lineno; RESET_COL
44
#define INC_COL					++lexer->colno
45
#define DEC_COL					--lexer->colno
46
#define RESET_COL				lexer->colno = 1
47
#define IS_EOF					(lexer->offset >= lexer->length)
48
#define DEC_OFFSET				--lexer->offset; DEC_COL
49
#define DEC_POSITION			--lexer->position
50
#define DEC_OFFSET_POSITION		DEC_OFFSET; DEC_POSITION
51
#define INC_OFFSET				++lexer->offset; INC_COL
52
#define INC_POSITION			++lexer->position
53
#define INC_OFFSET_POSITION		INC_OFFSET; INC_POSITION
54

55
// TOKEN macros
56
#define TOKEN_RESET				lexer->token = NO_TOKEN; lexer->token.position = lexer->position; lexer->token.value = lexer->buffer + lexer->offset;	\
57
								lexer->token.lineno = lexer->lineno; lexer->token.colno = lexer->colno
58
#define TOKEN_FINALIZE(t)		lexer->token.type = t; lexer->token.fileid = lexer->fileid
59
#define INC_TOKBYTES			++lexer->token.bytes
60
#define INC_TOKUTF8LEN			++lexer->token.length
61
#define INC_TOKLEN				INC_TOKBYTES; INC_TOKUTF8LEN
62
#define DEC_TOKLEN				--lexer->token.bytes; --lexer->token.length
63
#define SET_TOKESCAPED(value)	lexer->token.escaped = value
64
#define SET_TOKTYPE(t)			lexer->token.type = t
65

66
#define LEXER_CALL_CALLBACK()	if ((lexer->peeking == false) && (lexer->delegate) && (lexer->delegate->parser_callback)) {	\
67
									lexer->delegate->parser_callback(&lexer->token, lexer->delegate->xdata); }
68

69
// MARK: -
70

71
static inline bool is_whitespace (int c) {
72
	return ((c == ' ') || (c == '\t') || (c == '\v') || (c == '\f'));
73
}
74

75
static inline bool is_newline (gravity_lexer_t *lexer, int c) {
76
	// CR: Carriage Return, U+000D (UTF-8 in hex: 0D)
77
	// LF: Line Feed, U+000A (UTF-8 in hex: 0A)
78
	// CR+LF: CR (U+000D) followed by LF (U+000A) (UTF-8 in hex: 0D0A)
79
	
80
	// LF
81
	if (c == 0x0A) return true;
82
	
83
	// CR+LF or CR
84
	if (c == 0x0D) {
85
		if (PEEK_NEXT == 0x0A) {NEXT; return true;}
86
		return true;
87
	}
88
	
89
	// UTF-8 cases https://en.wikipedia.org/wiki/Newline#Unicode
90
	
91
	// NEL: Next Line, U+0085 (UTF-8 in hex: C285)
92
	if ((c == 0xC2) && (PEEK_NEXT == 0x85)) {
93
		NEXT;
94
		return true;
95
	}
96
	
97
	// LS: Line Separator, U+2028 (UTF-8 in hex: E280A8)
98
	if ((c == 0xE2) && (PEEK_NEXT == 0x80) && (PEEK_NEXT2 == 0xA8)) {
99
		NEXT; NEXT;
100
		return true;
101
	}
102
		
103
	// and probably more not handled here
104
	return false;
105
}
106

107
static inline bool is_comment (int c1, int c2) {
108
	return (c1 == '/') && ((c2 == '*') || (c2 == '/'));
109
}
110

111
static inline bool is_semicolon (int c) {
112
	return (c == ';');
113
}
114

115
static inline bool is_alpha (int c) {
116
	if (c == '_') return true;
117
	return isalpha(c);
118
}
119

120
static inline bool is_digit (int c, gravity_number_type ntype) {
121
	if (ntype == NUMBER_BIN) return (c == '0' || (c == '1'));
122
	if (ntype == NUMBER_OCT) return (c >= '0' && (c <= '7'));
123
	if ((ntype == NUMBER_HEX) && ((toupper(c) >= 'A' && toupper(c) <= 'F'))) return true;
124
	return isdigit(c);
125
}
126

127
static inline bool is_string (int c) {
128
	return ((c == '"') || (c == '\''));
129
}
130

131
static inline bool is_special (int c) {
132
	return (c == '@');
133
}
134

135
static inline bool is_builtin_operator (int c) {
136
	// PARENTHESIS
137
	// { } [ ] ( )
138
	// PUNCTUATION
139
	// . ; : ? ,
140
	// OPERATORS
141
	// + - * / < > ! = | & ^ % ~
142
	
143
	return ((c == '+') || (c == '-') || (c == '*') || (c == '/') ||
144
			(c == '<') || (c == '>') || (c == '!') || (c == '=') ||
145
			(c == '|') || (c == '&') || (c == '^') || (c == '%') ||
146
			(c == '~') || (c == '.') || (c == ';') || (c == ':') ||
147
			(c == '?') || (c == ',') || (c == '{') || (c == '}') ||
148
			(c == '[') || (c == ']') || (c == '(') || (c == ')') );
149
}
150

151
static inline bool is_preprocessor (int c) {
152
	return (c == '#');
153
}
154

155
static inline bool is_identifier (int c) {
156
	// when called I am already sure first character is alpha so next valid characters are alpha, digit and _
157
	return ((isalpha(c)) || (isdigit(c)) || (c == '_'));
158
}
159

160
// MARK: -
161

162
static gtoken_t lexer_error(gravity_lexer_t *lexer, const char *message) {
163
	if (!IS_EOF) {
164
		INC_TOKLEN;
165
		INC_OFFSET_POSITION;
166
	}
167
	TOKEN_FINALIZE(TOK_ERROR);
168

169
	lexer->token.value = (char *)message;
170
	lexer->token.bytes = (uint32_t)strlen(message);
171
	return TOK_ERROR;
172
}
173

174
static inline int next_utf8(gravity_lexer_t *lexer) {
175
	int c = NEXT;
176
	INC_TOKLEN;
177
	
178
	uint32_t len = utf8_charbytes((const char *)&c, 0);
179
	if (len == 1) return c;
180
	
181
	switch(len) {
182
		case 0: lexer_error(lexer, "Unknown character inside a string literal"); return 0;
183
		case 2: INC_OFFSET; INC_TOKBYTES; break;
184
		case 3: INC_OFFSET; INC_OFFSET; INC_TOKBYTES; INC_TOKBYTES; break;
185
		case 4: INC_OFFSET; INC_OFFSET; INC_OFFSET; INC_TOKBYTES; INC_TOKBYTES; INC_TOKBYTES; INC_POSITION; INC_TOKUTF8LEN; break;
186
	}
187
	
188
	return c;
189
}
190

191
static gtoken_t lexer_scan_comment(gravity_lexer_t *lexer) {
192
	bool isLineComment = (PEEK_NEXT == '/');
193
	
194
	TOKEN_RESET;
195
	INC_OFFSET_POSITION;
196
	INC_OFFSET_POSITION;
197
	
198
	// because I already scanned /* or //
199
	lexer->token.bytes = lexer->token.length = 2;
200
	
201
	// count necessary only to support nested comments
202
	int count = 1;
203
	while (!IS_EOF) {
204
		int c = next_utf8(lexer);
205
		
206
		if (isLineComment){
207
			if (is_newline(lexer, c)) {INC_LINE; break;}
208
		} else {
209
			int c2 = PEEK_CURRENT;
210
			if ((c == '/') && (c2 == '*')) ++count;
211
			if ((c == '*') && (c2 == '/')) {--count; NEXT; INC_TOKLEN; if (count == 0) break;}
212
			if (is_newline(lexer, c)) {INC_LINE;}
213
		}
214
	}
215
	
216
	// comment is from buffer->[nseek] and it is nlen length
217
	TOKEN_FINALIZE(TOK_COMMENT);
218
	
219
	// comments callback is called directly from the scan function and not from the main scan loop
220
	if ((lexer->delegate) && (lexer->delegate->parser_callback)) {
221
		lexer->delegate->parser_callback(&lexer->token, lexer->delegate->xdata);
222
	}
223
	
224
	DEBUG_LEXEM("Found comment");
225
	return TOK_COMMENT;
226
}
227

228
static gtoken_t lexer_scan_semicolon(gravity_lexer_t *lexer) {
229
	TOKEN_RESET;
230
	INC_TOKLEN;
231
	INC_OFFSET_POSITION;
232
	TOKEN_FINALIZE(TOK_OP_SEMICOLON);
233
	
234
	return TOK_OP_SEMICOLON;
235
}
236

237
static gtoken_t lexer_scan_identifier(gravity_lexer_t *lexer) {	
238
	TOKEN_RESET;
239
	while (is_identifier(PEEK_CURRENT)) {
240
		INC_OFFSET_POSITION;
241
		INC_TOKLEN;
242
	}
243
	
244
	TOKEN_FINALIZE(TOK_IDENTIFIER);
245
	
246
	// first check if it is a reserved word, otherwise reports it as an identifier
247
	gtoken_t type = token_keyword(lexer->token.value, lexer->token.bytes);
248
	SET_TOKTYPE(type);
249
	
250
	#if GRAVITY_LEXEM_DEBUG
251
	if (type == TOK_IDENTIFIER) DEBUG_LEXEM("Found identifier: %.*s", TOKEN_BYTES(lexer->token), TOKEN_VALUE(lexer->token));
252
	else DEBUG_LEXEM("Found keyword: %s", token_name(type));
253
	#endif
254
	
255
	return type;
256
}
257

258
static gtoken_t lexer_scan_number(gravity_lexer_t *lexer) {
259
	bool		floatAllowed = true;
260
	bool		expAllowed = true;
261
	bool		signAllowed = false;
262
	bool		dotFound = false;
263
	bool		expFound = false;
264
	int			c, expChar = 'e', floatChar = '.';
265
	int			plusSign = '+', minusSign = '-';
266
	
267
	gravity_number_type	ntype = NUMBER_INTEGER;
268
	if (PEEK_CURRENT == '0') {
269
		if (toupper(PEEK_NEXT) == 'X') {ntype = NUMBER_HEX; floatAllowed = false; expAllowed = false;}
270
		else if (toupper(PEEK_NEXT) == 'B') {ntype = NUMBER_BIN; floatAllowed = false; expAllowed = false;}
271
		else if (toupper(PEEK_NEXT) == 'O') {ntype = NUMBER_OCT; floatAllowed = false; expAllowed = false;}
272
	}
273
	
274
	TOKEN_RESET;
275
	if (ntype != NUMBER_INTEGER) {
276
		// skip first 0* number marker
277
		INC_TOKLEN;
278
		INC_TOKLEN;
279
		INC_OFFSET_POSITION;
280
		INC_OFFSET_POSITION;
281
	}
282
	
283
	// supported exp formats:
284
	// 12345	// decimal
285
	// 3.1415	// float
286
	// 1.25e2 = 1.25 * 10^2 = 125.0		// scientific notation
287
	// 1.25e-2 = 1.25 * 10^-2 = 0.0125	// scientific notation
288
	// 0xFFFF	// hex
289
	// 0B0101	// binary
290
	// 0O7777	// octal
291
	
292
	if (ntype == NUMBER_HEX) {
293
		
294
	}
295
	
296
loop:
297
	c = PEEK_CURRENT;
298
	
299
	// explicitly list all accepted cases
300
	if (IS_EOF) goto report_token;
301
	if (is_digit(c, ntype)) goto accept_char;
302
	if (is_whitespace(c)) goto report_token;
303
	if (is_newline(lexer, c)) goto report_token;
304
	
305
	if (expAllowed) {
306
		if ((c == expChar) && (!expFound)) {expFound = true; signAllowed = true; goto accept_char;}
307
	}
308
	if (floatAllowed) {
309
		if ((c == floatChar) && (!is_digit(PEEK_NEXT, ntype))) goto report_token;
310
		if ((c == floatChar) && (!dotFound))  {dotFound = true; goto accept_char;}
311
	}
312
	if (signAllowed) {
313
		if ((c == plusSign) || (c == minusSign)) {signAllowed = false; goto accept_char;}
314
	}
315
	if (is_builtin_operator(c)) goto report_token;
316
	if (is_semicolon(c)) goto report_token;
317
	
318
	// any other case is an error
319
	goto report_error;
320
	
321
accept_char:
322
	INC_TOKLEN;
323
	INC_OFFSET_POSITION;
324
	goto loop;
325

326
report_token:
327
	// number is from buffer->[nseek] and it is bytes length
328
	TOKEN_FINALIZE(TOK_NUMBER);
329
	
330
	DEBUG_LEXEM("Found number: %.*s", TOKEN_BYTES(lexer->token), TOKEN_VALUE(lexer->token));
331
	return TOK_NUMBER;
332
	
333
report_error:
334
	return lexer_error(lexer, "Malformed number expression.");
335
}
336

337
static gtoken_t lexer_scan_string(gravity_lexer_t *lexer) {
338
	int c, c2;
339
	
340
	// no memory allocation here
341
	c = NEXT;					// save escaped character
342
	TOKEN_RESET;				// save offset
343
	SET_TOKESCAPED(false);		// set escaped flag to false
344
	
345
	while ((c2 = (unsigned char)PEEK_CURRENT) != c) {
346
		if (IS_EOF) {return lexer_error(lexer, "Unexpected EOF inside a string literal");}
347
		if (is_newline(lexer, c2)) INC_LINE;
348
		
349
		// handle escaped characters
350
		if (c2 == '\\') {
351
			SET_TOKESCAPED(true);
352
			INC_OFFSET_POSITION;
353
			INC_OFFSET_POSITION;
354
			INC_TOKLEN;
355
			INC_TOKLEN;
356
			continue;
357
		}
358
		
359
		// scan next
360
		next_utf8(lexer);
361
	}
362
	
363
	// skip last escape character
364
	INC_OFFSET_POSITION;
365
	
366
	// string is from buffer->[nseek] and it is nlen length
367
	TOKEN_FINALIZE(TOK_STRING);
368
	
369
	DEBUG_LEXEM("Found string: %.*s", TOKEN_BYTES(lexer->token), TOKEN_VALUE(lexer->token));
370
	return TOK_STRING;
371
}
372

373
static gtoken_t lexer_scan_operator(gravity_lexer_t *lexer) {
374
	TOKEN_RESET;
375
	INC_TOKLEN;
376
	
377
	int c = NEXT;
378
	int c2 = PEEK_CURRENT;
379
	int tok = 0;
380
	
381
	switch (c) {
382
		case '=':
383
			if (c2 == '=') {
384
				INC_OFFSET_POSITION; INC_TOKLEN; c2 = PEEK_CURRENT;
385
				if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_ISIDENTICAL;}
386
				else tok = TOK_OP_ISEQUAL;
387
			}
388
			else tok = TOK_OP_ASSIGN;
389
			break;
390
		case '+':
391
			if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_ADD_ASSIGN;}
392
			else tok = TOK_OP_ADD;
393
			break;
394
		case '-':
395
			if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_SUB_ASSIGN;}
396
			else tok = TOK_OP_SUB;
397
			break;
398
		case '*':
399
			if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_MUL_ASSIGN;}
400
			else tok = TOK_OP_MUL;
401
			break;
402
		case '/':
403
			if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_DIV_ASSIGN;}
404
			else tok = TOK_OP_DIV;
405
			break;
406
		case '%':
407
			if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_REM_ASSIGN;}
408
			else tok = TOK_OP_REM;
409
			break;
410
		case '<':
411
			if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_LESS_EQUAL;}
412
			else if (c2 == '<') {
413
				INC_OFFSET_POSITION; INC_TOKLEN; c2 = PEEK_CURRENT;
414
				if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_SHIFT_LEFT_ASSIGN;}
415
				else tok = TOK_OP_SHIFT_LEFT;
416
			}
417
			else tok = TOK_OP_LESS;
418
			break;
419
		case '>':
420
			if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_GREATER_EQUAL;}
421
			else if (c2 == '>') {
422
				INC_OFFSET_POSITION; INC_TOKLEN; c2 = PEEK_CURRENT;
423
				if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_SHIFT_RIGHT_ASSIGN;}
424
				else tok = TOK_OP_SHIFT_RIGHT;
425
			}
426
			else tok = TOK_OP_GREATER;
427
			break;
428
		case '&':
429
			if (c2 == '&') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_AND;}
430
			else if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_BIT_AND_ASSIGN;}
431
			else tok = TOK_OP_BIT_AND;
432
			break;
433
		case '|':
434
			if (c2 == '|') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_OR;}
435
			else if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_BIT_OR_ASSIGN;}
436
			else tok = TOK_OP_BIT_OR;
437
			break;
438
		case '.': // check for special .digit case
439
			if (is_digit(c2, false)) {DEC_OFFSET_POSITION; DEC_TOKLEN; tok = lexer_scan_number(lexer);}
440
			else if (c2 == '.') {
441
				// seems a range, now peek c2 again and decide range type
442
				INC_OFFSET_POSITION; INC_TOKLEN; c2 = PEEK_CURRENT;
443
				if ((c2 == '<') || (c2 == '.')) {
444
					INC_OFFSET_POSITION; INC_TOKLEN;
445
					tok = (c2 == '<') ? TOK_OP_RANGE_EXCLUDED : TOK_OP_RANGE_INCLUDED;
446
				} else {
447
					return lexer_error(lexer, "Unrecognized Range operator");
448
				}
449
			}
450
			else tok = TOK_OP_DOT;
451
			break;
452
		case ',':
453
			tok = TOK_OP_COMMA;
454
			break;
455
		case '!':
456
			if (c2 == '=') {
457
				INC_OFFSET_POSITION; INC_TOKLEN; c2 = PEEK_CURRENT;
458
				if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_ISNOTIDENTICAL;}
459
				else tok = TOK_OP_ISNOTEQUAL;
460
			}
461
			else tok = TOK_OP_NOT;
462
			break;
463
		case '^':
464
			if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_BIT_XOR_ASSIGN;}
465
			else tok = TOK_OP_BIT_XOR;
466
			break;
467
		case '~':
468
			if (c2 == '=') {INC_OFFSET_POSITION; INC_TOKLEN; tok = TOK_OP_PATTERN_MATCH;}
469
			else tok = TOK_OP_BIT_NOT;
470
			break;
471
		case ':':
472
			tok = TOK_OP_COLON;
473
			break;
474
		case '{':
475
			tok = TOK_OP_OPEN_CURLYBRACE;
476
			break;
477
		case '}':
478
			tok = TOK_OP_CLOSED_CURLYBRACE;
479
			break;
480
		case '[':
481
			tok = TOK_OP_OPEN_SQUAREBRACKET;
482
			break;
483
		case ']':
484
			tok = TOK_OP_CLOSED_SQUAREBRACKET;
485
			break;
486
		case '(':
487
			tok = TOK_OP_OPEN_PARENTHESIS;
488
			break;
489
		case ')':
490
			tok = TOK_OP_CLOSED_PARENTHESIS;
491
			break;
492
		case '?':
493
			tok = TOK_OP_TERNARY;
494
			break;
495
		default:
496
			return lexer_error(lexer, "Unrecognized Operator");
497
			
498
	}
499
	
500
	TOKEN_FINALIZE(tok);
501
	
502
	DEBUG_LEXEM("Found operator: %s", token_name(tok));
503
	return tok;	
504
}
505

506
static gtoken_t lexer_scan_special(gravity_lexer_t *lexer) {
507
	TOKEN_RESET;
508
	INC_TOKLEN;
509
	INC_OFFSET_POSITION;
510
	TOKEN_FINALIZE(TOK_SPECIAL);
511
	
512
	return TOK_SPECIAL;
513
}
514

515
static gtoken_t lexer_scan_preprocessor(gravity_lexer_t *lexer) {
516
	TOKEN_RESET;
517
	INC_TOKLEN;
518
	INC_OFFSET_POSITION;
519
	TOKEN_FINALIZE(TOK_MACRO);
520
	
521
	return TOK_MACRO;
522
}
523

524
// MARK: -
525

526
gravity_lexer_t *gravity_lexer_create (const char *source, size_t len, uint32_t fileid, bool is_static) {
527
	gravity_lexer_t *lexer = mem_alloc(sizeof(gravity_lexer_t));
528
	if (!lexer) return NULL;
529
	bzero(lexer, sizeof(gravity_lexer_t));
530
	
531
	lexer->is_static = is_static;
532
	lexer->lineno = 1;
533
	lexer->buffer = source;
534
	lexer->length = (uint32_t)len;
535
	lexer->fileid = fileid;
536
	lexer->peeking = false;
537
	return lexer;
538
}
539

540
void gravity_lexer_setdelegate (gravity_lexer_t *lexer, gravity_delegate_t *delegate) {
541
	lexer->delegate = delegate;
542
}
543

544
gtoken_t gravity_lexer_peek (gravity_lexer_t *lexer) {
545
	lexer->peeking = true;
546
	gravity_lexer_t saved = *lexer;
547
	
548
	gtoken_t result = gravity_lexer_next(lexer);
549
	
550
	*lexer = saved;
551
	lexer->peeking = false;
552
	
553
	return result;
554
}
555

556
gtoken_t gravity_lexer_next (gravity_lexer_t *lexer) {
557
	int			c;
558
	gtoken_t	token;
559
	
560
loop:
561
	if (IS_EOF) return TOK_EOF;
562
	c = PEEK_CURRENT;
563
	
564
	if (is_whitespace(c)) {INC_OFFSET_POSITION; goto loop;}
565
	if (is_newline(lexer, c)) {INC_OFFSET_POSITION; INC_LINE; goto loop;}
566
	if (is_comment(c, PEEK_NEXT)) {lexer_scan_comment(lexer); goto loop;}
567
	
568
	if (is_semicolon(c)) {token = lexer_scan_semicolon(lexer); goto return_result;}
569
	if (is_alpha(c)) {token = lexer_scan_identifier(lexer); goto return_result;}
570
	if (is_digit(c, false)) {token = lexer_scan_number(lexer); goto return_result;}
571
	if (is_string(c)) {token = lexer_scan_string(lexer); goto return_result;}
572
	if (is_builtin_operator(c)) {token = lexer_scan_operator(lexer); goto return_result;}
573
	if (is_special(c)) {token = lexer_scan_special(lexer); goto return_result;}
574
	if (is_preprocessor(c)) {token = lexer_scan_preprocessor(lexer); goto return_result;}
575
	
576
	return lexer_error(lexer, "Unrecognized token");
577
	
578
return_result:
579
	LEXER_CALL_CALLBACK();
580
	return token;
581
}
582

583
void gravity_lexer_free (gravity_lexer_t *lexer) {
584
	if ((!lexer->is_static) && (lexer->buffer)) mem_free(lexer->buffer);
585
	mem_free(lexer);
586
}
587

588
gtoken_s gravity_lexer_token (gravity_lexer_t *lexer) {
589
	return lexer->token;
590
}
591

592
gtoken_s gravity_lexer_token_next (gravity_lexer_t *lexer) {
593
	gtoken_s token = lexer->token;
594
	token.lineno = lexer->lineno;
595
	token.colno = lexer->colno;
596
	token.position = lexer->position;
597
	return token;
598
}
599

600
gtoken_t gravity_lexer_token_type (gravity_lexer_t *lexer) {
601
	return lexer->token.type;
602
}
603

604
void gravity_lexer_token_dump (gtoken_s token) {
605
	printf("(%02d, %02d) %s: ", token.lineno, token.colno, token_name(token.type));
606
	printf("%.*s\t(offset: %d len:%d)\n", token.bytes, token.value, token.position, token.bytes);
607
}
608

609
#if GRAVITY_LEXER_DEGUB
610
void gravity_lexer_debug (gravity_lexer_t *lexer) {
611
	//static int lineno = 0;
612
	if (lexer->peeking) return;
613
	//if (lineno > 0) printf("\n");
614
	gtoken_s token = lexer->token;
615
	if ((token.lineno == 0) && (token.colno == 0)) return;
616
	printf("(%02d, %02d) %s: ", token.lineno, token.colno, token_name(token.type));
617
	printf("%.*s\t(offset: %d)\n", token.bytes, token.value, token.position);
618
	//++lineno;
619
}
620
#endif
621

622
Product

Resources

Company