CoCalc -- lexer.rs

GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/reader/src/lexer.rs
²⁴⁵⁰ views
1
//! Lexical analysis for .clif files.
2

3
use crate::error::Location;
4
use cranelift_codegen::ir::types;
5
use cranelift_codegen::ir::{Block, Value};
6
use std::str::CharIndices;
7
use std::u16;
8

9
/// A Token returned from the `Lexer`.
10
///
11
/// Some variants may contains references to the original source text, so the `Token` has the same
12
/// lifetime as the source.
13
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
14
pub enum Token<'a> {
15
    Comment(&'a str),
16
    LPar,                   // '('
17
    RPar,                   // ')'
18
    LBrace,                 // '{'
19
    RBrace,                 // '}'
20
    LBracket,               // '['
21
    RBracket,               // ']'
22
    LAngle,                 // '<'
23
    RAngle,                 // '>'
24
    Minus,                  // '-'
25
    Plus,                   // '+'
26
    Multiply,               // '*'
27
    Comma,                  // ','
28
    Dot,                    // '.'
29
    Colon,                  // ':'
30
    Equal,                  // '='
31
    Bang,                   // '!'
32
    At,                     // '@'
33
    Arrow,                  // '->'
34
    Float(&'a str),         // Floating point immediate
35
    Integer(&'a str),       // Integer immediate
36
    Type(types::Type),      // i32, f32, i32x4, ...
37
    DynamicType(u32),       // dt5
38
    Value(Value),           // v12, v7
39
    Block(Block),           // block3
40
    Cold,                   // cold (flag on block)
41
    StackSlot(u32),         // ss3
42
    DynamicStackSlot(u32),  // dss4
43
    GlobalValue(u32),       // gv3
44
    MemoryType(u32),        // mt0
45
    Constant(u32),          // const2
46
    FuncRef(u32),           // fn2
47
    SigRef(u32),            // sig2
48
    UserRef(u32),           // u345
49
    UserNameRef(u32),       // userextname345
50
    ExceptionTableRef(u32), // ex123
51
    ExceptionTag(u32),      // tag123
52
    TryCallRet(u32),        // ret123
53
    TryCallExn(u32),        // exn123
54
    Name(&'a str),          // %9arbitrary_alphanum, %x3, %0, %function ...
55
    String(&'a str),        // "arbitrary quoted string with no escape" ...
56
    HexSequence(&'a str),   // #89AF
57
    Identifier(&'a str),    // Unrecognized identifier (opcode, enumerator, ...)
58
    SourceLoc(&'a str),     // @00c7
59
}
60

61
/// A `Token` with an associated location.
62
#[derive(Debug, PartialEq, Eq)]
63
pub struct LocatedToken<'a> {
64
    pub token: Token<'a>,
65
    pub location: Location,
66
}
67

68
/// Wrap up a `Token` with the given location.
69
fn token(token: Token, loc: Location) -> Result<LocatedToken, LocatedError> {
70
    Ok(LocatedToken {
71
        token,
72
        location: loc,
73
    })
74
}
75

76
/// An error from the lexical analysis.
77
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78
pub enum LexError {
79
    InvalidChar,
80
}
81

82
/// A `LexError` with an associated Location.
83
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
84
pub struct LocatedError {
85
    pub error: LexError,
86
    pub location: Location,
87
}
88

89
/// Wrap up a `LexError` with the given location.
90
fn error<'a>(error: LexError, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
91
    Err(LocatedError {
92
        error,
93
        location: loc,
94
    })
95
}
96

97
/// Get the number of decimal digits at the end of `s`.
98
fn trailing_digits(s: &str) -> usize {
99
    // It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
100
    s.as_bytes()
101
        .iter()
102
        .rev()
103
        .take_while(|&&b| b'0' <= b && b <= b'9')
104
        .count()
105
}
106

107
/// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
108
/// letters and numeric tail.
109
pub fn split_entity_name(name: &str) -> Option<(&str, u32)> {
110
    let (head, tail) = name.split_at(name.len() - trailing_digits(name));
111
    if tail.len() > 1 && tail.starts_with('0') {
112
        None
113
    } else {
114
        tail.parse().ok().map(|n| (head, n))
115
    }
116
}
117

118
/// Lexical analysis.
119
///
120
/// A `Lexer` reads text from a `&str` and provides a sequence of tokens.
121
///
122
/// Also keep track of a line number for error reporting.
123
///
124
pub struct Lexer<'a> {
125
    // Complete source being processed.
126
    source: &'a str,
127

128
    // Iterator into `source`.
129
    chars: CharIndices<'a>,
130

131
    // Next character to be processed, or `None` at the end.
132
    lookahead: Option<char>,
133

134
    // Index into `source` of lookahead character.
135
    pos: usize,
136

137
    // Current line number.
138
    line_number: usize,
139
}
140

141
impl<'a> Lexer<'a> {
142
    pub fn new(s: &'a str) -> Self {
143
        let mut lex = Self {
144
            source: s,
145
            chars: s.char_indices(),
146
            lookahead: None,
147
            pos: 0,
148
            line_number: 1,
149
        };
150
        // Advance to the first char.
151
        lex.next_ch();
152
        lex
153
    }
154

155
    // Advance to the next character.
156
    // Return the next lookahead character, or None when the end is encountered.
157
    // Always update cur_ch to reflect
158
    fn next_ch(&mut self) -> Option<char> {
159
        if self.lookahead == Some('\n') {
160
            self.line_number += 1;
161
        }
162
        match self.chars.next() {
163
            Some((idx, ch)) => {
164
                self.pos = idx;
165
                self.lookahead = Some(ch);
166
            }
167
            None => {
168
                self.pos = self.source.len();
169
                self.lookahead = None;
170
            }
171
        }
172
        self.lookahead
173
    }
174

175
    // Get the location corresponding to `lookahead`.
176
    fn loc(&self) -> Location {
177
        Location {
178
            line_number: self.line_number,
179
        }
180
    }
181

182
    // Starting from `lookahead`, are we looking at `prefix`?
183
    fn looking_at(&self, prefix: &str) -> bool {
184
        self.source[self.pos..].starts_with(prefix)
185
    }
186

187
    // Starting from `lookahead`, are we looking at a number?
188
    fn looking_at_numeric(&self) -> bool {
189
        if let Some(c) = self.lookahead {
190
            match c {
191
                '0'..='9' => return true,
192
                '-' => return true,
193
                '+' => return true,
194
                '.' => return true,
195
                _ => {}
196
            }
197
            if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") {
198
                return true;
199
            }
200
        }
201
        false
202
    }
203

204
    // Scan a single-char token.
205
    fn scan_char(&mut self, tok: Token<'a>) -> Result<LocatedToken<'a>, LocatedError> {
206
        assert_ne!(self.lookahead, None);
207
        let loc = self.loc();
208
        self.next_ch();
209
        token(tok, loc)
210
    }
211

212
    // Scan a multi-char token.
213
    fn scan_chars(
214
        &mut self,
215
        count: usize,
216
        tok: Token<'a>,
217
    ) -> Result<LocatedToken<'a>, LocatedError> {
218
        let loc = self.loc();
219
        for _ in 0..count {
220
            assert_ne!(self.lookahead, None);
221
            self.next_ch();
222
        }
223
        token(tok, loc)
224
    }
225

226
    /// Get the rest of the current line.
227
    /// The next token returned by `next()` will be from the following lines.
228
    pub fn rest_of_line(&mut self) -> &'a str {
229
        let begin = self.pos;
230
        loop {
231
            match self.next_ch() {
232
                None | Some('\n') => return &self.source[begin..self.pos],
233
                _ => {}
234
            }
235
        }
236
    }
237

238
    // Scan a comment extending to the end of the current line.
239
    fn scan_comment(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
240
        let loc = self.loc();
241
        let text = self.rest_of_line();
242
        token(Token::Comment(text), loc)
243
    }
244

245
    // Scan a number token which can represent either an integer or floating point number.
246
    //
247
    // Accept the following forms:
248
    //
249
    // - `10`: Integer
250
    // - `-10`: Integer
251
    // - `0xff_00`: Integer
252
    // - `0.0`: Float
253
    // - `0x1.f`: Float
254
    // - `-0x2.4`: Float
255
    // - `0x0.4p-34`: Float
256
    //
257
    // This function does not filter out all invalid numbers. It depends in the context-sensitive
258
    // decoding of the text for that. For example, the number of allowed digits in an `Ieee32` and
259
    // an `Ieee64` constant are different.
260
    fn scan_number(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
261
        let begin = self.pos;
262
        let loc = self.loc();
263
        let mut is_float = false;
264

265
        // Skip a leading sign.
266
        match self.lookahead {
267
            Some('-') => {
268
                self.next_ch();
269
                if !self.looking_at_numeric() {
270
                    // If the next characters won't parse as a number, we return Token::Minus
271
                    return token(Token::Minus, loc);
272
                }
273
            }
274
            Some('+') => {
275
                self.next_ch();
276
                if !self.looking_at_numeric() {
277
                    // If the next characters won't parse as a number, we return Token::Plus
278
                    return token(Token::Plus, loc);
279
                }
280
            }
281
            _ => {}
282
        }
283

284
        // Check for NaNs with payloads.
285
        if self.looking_at("NaN:") || self.looking_at("sNaN:") {
286
            // Skip the `NaN:` prefix, the loop below won't accept it.
287
            // We expect a hexadecimal number to follow the colon.
288
            while self.next_ch() != Some(':') {}
289
            is_float = true;
290
        } else if self.looking_at("NaN") || self.looking_at("Inf") {
291
            // This is Inf or a default quiet NaN.
292
            is_float = true;
293
        }
294

295
        // Look for the end of this number. Detect the radix point if there is one.
296
        loop {
297
            match self.next_ch() {
298
                Some('-') | Some('_') => {}
299
                Some('.') => is_float = true,
300
                Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
301
                _ => break,
302
            }
303
        }
304
        let text = &self.source[begin..self.pos];
305
        if is_float {
306
            token(Token::Float(text), loc)
307
        } else {
308
            token(Token::Integer(text), loc)
309
        }
310
    }
311

312
    // Scan a 'word', which is an identifier-like sequence of characters beginning with '_' or an
313
    // alphabetic char, followed by zero or more alphanumeric or '_' characters.
314
    fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
315
        let begin = self.pos;
316
        let loc = self.loc();
317

318
        assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic());
319
        loop {
320
            match self.next_ch() {
321
                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
322
                _ => break,
323
            }
324
        }
325
        let text = &self.source[begin..self.pos];
326

327
        // Look for numbered well-known entities like block15, v45, ...
328
        token(
329
            split_entity_name(text)
330
                .and_then(|(prefix, number)| {
331
                    Self::numbered_entity(prefix, number)
332
                        .or_else(|| Self::value_type(text, prefix, number))
333
                })
334
                .unwrap_or_else(|| match text {
335
                    "cold" => Token::Cold,
336
                    _ => Token::Identifier(text),
337
                }),
338
            loc,
339
        )
340
    }
341

342
    // If prefix is a well-known entity prefix and suffix is a valid entity number, return the
343
    // decoded token.
344
    fn numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>> {
345
        match prefix {
346
            "v" => Value::with_number(number).map(Token::Value),
347
            "block" => Block::with_number(number).map(Token::Block),
348
            "ss" => Some(Token::StackSlot(number)),
349
            "dss" => Some(Token::DynamicStackSlot(number)),
350
            "dt" => Some(Token::DynamicType(number)),
351
            "gv" => Some(Token::GlobalValue(number)),
352
            "mt" => Some(Token::MemoryType(number)),
353
            "const" => Some(Token::Constant(number)),
354
            "fn" => Some(Token::FuncRef(number)),
355
            "sig" => Some(Token::SigRef(number)),
356
            "u" => Some(Token::UserRef(number)),
357
            "userextname" => Some(Token::UserNameRef(number)),
358
            "extable" => Some(Token::ExceptionTableRef(number)),
359
            "tag" => Some(Token::ExceptionTag(number)),
360
            "ret" => Some(Token::TryCallRet(number)),
361
            "exn" => Some(Token::TryCallExn(number)),
362
            _ => None,
363
        }
364
    }
365

366
    // Recognize a scalar or vector type.
367
    fn value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>> {
368
        let is_vector = prefix.ends_with('x');
369
        let scalar = if is_vector {
370
            &prefix[0..prefix.len() - 1]
371
        } else {
372
            text
373
        };
374
        let base_type = match scalar {
375
            "i8" => types::I8,
376
            "i16" => types::I16,
377
            "i32" => types::I32,
378
            "i64" => types::I64,
379
            "i128" => types::I128,
380
            "f16" => types::F16,
381
            "f32" => types::F32,
382
            "f64" => types::F64,
383
            "f128" => types::F128,
384
            _ => return None,
385
        };
386
        if is_vector {
387
            if number <= u32::from(u16::MAX) {
388
                base_type.by(number).map(Token::Type)
389
            } else {
390
                None
391
            }
392
        } else {
393
            Some(Token::Type(base_type))
394
        }
395
    }
396

397
    fn scan_name(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
398
        let loc = self.loc();
399
        let begin = self.pos + 1;
400

401
        assert_eq!(self.lookahead, Some('%'));
402

403
        loop {
404
            match self.next_ch() {
405
                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
406
                _ => break,
407
            }
408
        }
409

410
        let end = self.pos;
411
        token(Token::Name(&self.source[begin..end]), loc)
412
    }
413

414
    /// Scan for a multi-line quoted string with no escape character.
415
    fn scan_string(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
416
        let loc = self.loc();
417
        let begin = self.pos + 1;
418

419
        assert_eq!(self.lookahead, Some('"'));
420

421
        while let Some(c) = self.next_ch() {
422
            if c == '"' {
423
                break;
424
            }
425
        }
426

427
        let end = self.pos;
428
        if self.lookahead != Some('"') {
429
            return error(LexError::InvalidChar, self.loc());
430
        }
431
        self.next_ch();
432
        token(Token::String(&self.source[begin..end]), loc)
433
    }
434

435
    fn scan_hex_sequence(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
436
        let loc = self.loc();
437
        let begin = self.pos + 1;
438

439
        assert_eq!(self.lookahead, Some('#'));
440

441
        while let Some(c) = self.next_ch() {
442
            if !char::is_digit(c, 16) {
443
                break;
444
            }
445
        }
446

447
        let end = self.pos;
448
        token(Token::HexSequence(&self.source[begin..end]), loc)
449
    }
450

451
    /// Given that we've consumed an `@` character, are we looking at a source
452
    /// location?
453
    fn looking_at_srcloc(&self) -> bool {
454
        match self.lookahead {
455
            Some(c) => char::is_digit(c, 16),
456
            _ => false,
457
        }
458
    }
459

460
    fn scan_srcloc(&mut self, pos: usize, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
461
        let begin = pos + 1;
462
        while let Some(c) = self.next_ch() {
463
            if !char::is_digit(c, 16) {
464
                break;
465
            }
466
        }
467

468
        let end = self.pos;
469
        token(Token::SourceLoc(&self.source[begin..end]), loc)
470
    }
471

472
    /// Get the next token or a lexical error.
473
    ///
474
    /// Return None when the end of the source is encountered.
475
    pub fn next(&mut self) -> Option<Result<LocatedToken<'a>, LocatedError>> {
476
        loop {
477
            let loc = self.loc();
478
            return match self.lookahead {
479
                None => None,
480
                Some(';') => Some(self.scan_comment()),
481
                Some('(') => Some(self.scan_char(Token::LPar)),
482
                Some(')') => Some(self.scan_char(Token::RPar)),
483
                Some('{') => Some(self.scan_char(Token::LBrace)),
484
                Some('}') => Some(self.scan_char(Token::RBrace)),
485
                Some('[') => Some(self.scan_char(Token::LBracket)),
486
                Some(']') => Some(self.scan_char(Token::RBracket)),
487
                Some('<') => Some(self.scan_char(Token::LAngle)),
488
                Some('>') => Some(self.scan_char(Token::RAngle)),
489
                Some(',') => Some(self.scan_char(Token::Comma)),
490
                Some('.') => Some(self.scan_char(Token::Dot)),
491
                Some(':') => Some(self.scan_char(Token::Colon)),
492
                Some('=') => Some(self.scan_char(Token::Equal)),
493
                Some('!') => Some(self.scan_char(Token::Bang)),
494
                Some('+') => Some(self.scan_number()),
495
                Some('*') => Some(self.scan_char(Token::Multiply)),
496
                Some('-') => {
497
                    if self.looking_at("->") {
498
                        Some(self.scan_chars(2, Token::Arrow))
499
                    } else {
500
                        Some(self.scan_number())
501
                    }
502
                }
503
                Some('0'..='9') => Some(self.scan_number()),
504
                Some('a'..='z') | Some('A'..='Z') => {
505
                    if self.looking_at("NaN") || self.looking_at("Inf") {
506
                        Some(self.scan_number())
507
                    } else {
508
                        Some(self.scan_word())
509
                    }
510
                }
511
                Some('%') => Some(self.scan_name()),
512
                Some('"') => Some(self.scan_string()),
513
                Some('#') => Some(self.scan_hex_sequence()),
514
                Some('@') => {
515
                    let pos = self.pos;
516
                    let loc = self.loc();
517
                    self.next_ch();
518
                    if self.looking_at_srcloc() {
519
                        Some(self.scan_srcloc(pos, loc))
520
                    } else {
521
                        Some(token(Token::At, loc))
522
                    }
523
                }
524
                // all ascii whitespace
525
                Some(' ') | Some('\x09'..='\x0d') => {
526
                    self.next_ch();
527
                    continue;
528
                }
529
                _ => {
530
                    // Skip invalid char, return error.
531
                    self.next_ch();
532
                    Some(error(LexError::InvalidChar, loc))
533
                }
534
            };
535
        }
536
    }
537
}
538

539
#[cfg(test)]
540
mod tests {
541
    use super::*;
542

543
    #[test]
544
    fn digits() {
545
        assert_eq!(trailing_digits(""), 0);
546
        assert_eq!(trailing_digits("x"), 0);
547
        assert_eq!(trailing_digits("0x"), 0);
548
        assert_eq!(trailing_digits("x1"), 1);
549
        assert_eq!(trailing_digits("1x1"), 1);
550
        assert_eq!(trailing_digits("1x01"), 2);
551
    }
552

553
    #[test]
554
    fn entity_name() {
555
        assert_eq!(split_entity_name(""), None);
556
        assert_eq!(split_entity_name("x"), None);
557
        assert_eq!(split_entity_name("x+"), None);
558
        assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
559
        assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
560
        assert_eq!(split_entity_name("1"), Some(("", 1)));
561
        assert_eq!(split_entity_name("x1"), Some(("x", 1)));
562
        assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
563
        // Reject this non-canonical form.
564
        assert_eq!(split_entity_name("inst01"), None);
565
    }
566

567
    fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
568
        Some(super::token(token, Location { line_number: line }))
569
    }
570

571
    fn error<'a>(error: LexError, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
572
        Some(super::error(error, Location { line_number: line }))
573
    }
574

575
    #[test]
576
    fn make_lexer() {
577
        let mut l1 = Lexer::new("");
578
        let mut l2 = Lexer::new(" ");
579
        let mut l3 = Lexer::new("\n ");
580

581
        assert_eq!(l1.next(), None);
582
        assert_eq!(l2.next(), None);
583
        assert_eq!(l3.next(), None);
584
    }
585

586
    #[test]
587
    fn lex_comment() {
588
        let mut lex = Lexer::new("; hello");
589
        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
590
        assert_eq!(lex.next(), None);
591

592
        lex = Lexer::new("\n  ;hello\n;foo");
593
        assert_eq!(lex.next(), token(Token::Comment(";hello"), 2));
594
        assert_eq!(lex.next(), token(Token::Comment(";foo"), 3));
595
        assert_eq!(lex.next(), None);
596

597
        // Scan a comment after an invalid char.
598
        let mut lex = Lexer::new("$; hello");
599
        assert_eq!(lex.next(), error(LexError::InvalidChar, 1));
600
        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
601
        assert_eq!(lex.next(), None);
602
    }
603

604
    #[test]
605
    fn lex_chars() {
606
        let mut lex = Lexer::new("(); hello\n = :{, }.");
607
        assert_eq!(lex.next(), token(Token::LPar, 1));
608
        assert_eq!(lex.next(), token(Token::RPar, 1));
609
        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
610
        assert_eq!(lex.next(), token(Token::Equal, 2));
611
        assert_eq!(lex.next(), token(Token::Colon, 2));
612
        assert_eq!(lex.next(), token(Token::LBrace, 2));
613
        assert_eq!(lex.next(), token(Token::Comma, 2));
614
        assert_eq!(lex.next(), token(Token::RBrace, 2));
615
        assert_eq!(lex.next(), token(Token::Dot, 2));
616
        assert_eq!(lex.next(), None);
617
    }
618

619
    #[test]
620
    fn lex_numbers() {
621
        let mut lex = Lexer::new(" 0 2_000 -1,0xf -0x0 0.0 0x0.4p-34 NaN +5");
622
        assert_eq!(lex.next(), token(Token::Integer("0"), 1));
623
        assert_eq!(lex.next(), token(Token::Integer("2_000"), 1));
624
        assert_eq!(lex.next(), token(Token::Integer("-1"), 1));
625
        assert_eq!(lex.next(), token(Token::Comma, 1));
626
        assert_eq!(lex.next(), token(Token::Integer("0xf"), 1));
627
        assert_eq!(lex.next(), token(Token::Integer("-0x0"), 1));
628
        assert_eq!(lex.next(), token(Token::Float("0.0"), 1));
629
        assert_eq!(lex.next(), token(Token::Float("0x0.4p-34"), 1));
630
        assert_eq!(lex.next(), token(Token::Float("NaN"), 1));
631
        assert_eq!(lex.next(), token(Token::Integer("+5"), 1));
632
        assert_eq!(lex.next(), None);
633
    }
634

635
    #[test]
636
    fn lex_identifiers() {
637
        let mut lex = Lexer::new(
638
            "v0 v00 vx01 block1234567890 block5234567890 v1x vx1 vxvx4 \
639
             function0 function i8 i32x4 f32x5 f16 f128",
640
        );
641
        assert_eq!(
642
            lex.next(),
643
            token(Token::Value(Value::with_number(0).unwrap()), 1)
644
        );
645
        assert_eq!(lex.next(), token(Token::Identifier("v00"), 1));
646
        assert_eq!(lex.next(), token(Token::Identifier("vx01"), 1));
647
        assert_eq!(
648
            lex.next(),
649
            token(Token::Block(Block::with_number(1234567890).unwrap()), 1)
650
        );
651
        assert_eq!(lex.next(), token(Token::Identifier("block5234567890"), 1));
652
        assert_eq!(lex.next(), token(Token::Identifier("v1x"), 1));
653
        assert_eq!(lex.next(), token(Token::Identifier("vx1"), 1));
654
        assert_eq!(lex.next(), token(Token::Identifier("vxvx4"), 1));
655
        assert_eq!(lex.next(), token(Token::Identifier("function0"), 1));
656
        assert_eq!(lex.next(), token(Token::Identifier("function"), 1));
657
        assert_eq!(lex.next(), token(Token::Type(types::I8), 1));
658
        assert_eq!(lex.next(), token(Token::Type(types::I32X4), 1));
659
        assert_eq!(lex.next(), token(Token::Identifier("f32x5"), 1));
660
        assert_eq!(lex.next(), token(Token::Type(types::F16), 1));
661
        assert_eq!(lex.next(), token(Token::Type(types::F128), 1));
662
        assert_eq!(lex.next(), None);
663
    }
664

665
    #[test]
666
    fn lex_hex_sequences() {
667
        let mut lex = Lexer::new("#0 #DEADbeef123 #789");
668

669
        assert_eq!(lex.next(), token(Token::HexSequence("0"), 1));
670
        assert_eq!(lex.next(), token(Token::HexSequence("DEADbeef123"), 1));
671
        assert_eq!(lex.next(), token(Token::HexSequence("789"), 1));
672
    }
673

674
    #[test]
675
    fn lex_names() {
676
        let mut lex = Lexer::new("%0 %x3 %function %123_abc %ss0 %v3 %block11 %const42 %_");
677

678
        assert_eq!(lex.next(), token(Token::Name("0"), 1));
679
        assert_eq!(lex.next(), token(Token::Name("x3"), 1));
680
        assert_eq!(lex.next(), token(Token::Name("function"), 1));
681
        assert_eq!(lex.next(), token(Token::Name("123_abc"), 1));
682
        assert_eq!(lex.next(), token(Token::Name("ss0"), 1));
683
        assert_eq!(lex.next(), token(Token::Name("v3"), 1));
684
        assert_eq!(lex.next(), token(Token::Name("block11"), 1));
685
        assert_eq!(lex.next(), token(Token::Name("const42"), 1));
686
        assert_eq!(lex.next(), token(Token::Name("_"), 1));
687
    }
688

689
    #[test]
690
    fn lex_strings() {
691
        let mut lex = Lexer::new(
692
            r#"""  "0" "x3""function" "123 abc" "\" "start
693
                    and end on
694
                    different lines" "#,
695
        );
696

697
        assert_eq!(lex.next(), token(Token::String(""), 1));
698
        assert_eq!(lex.next(), token(Token::String("0"), 1));
699
        assert_eq!(lex.next(), token(Token::String("x3"), 1));
700
        assert_eq!(lex.next(), token(Token::String("function"), 1));
701
        assert_eq!(lex.next(), token(Token::String("123 abc"), 1));
702
        assert_eq!(lex.next(), token(Token::String(r#"\"#), 1));
703
        assert_eq!(
704
            lex.next(),
705
            token(
706
                Token::String(
707
                    r#"start
708
                    and end on
709
                    different lines"#
710
                ),
711
                1
712
            )
713
        );
714
    }
715

716
    #[test]
717
    fn lex_userrefs() {
718
        let mut lex = Lexer::new("u0 u1 u234567890 u9:8765");
719

720
        assert_eq!(lex.next(), token(Token::UserRef(0), 1));
721
        assert_eq!(lex.next(), token(Token::UserRef(1), 1));
722
        assert_eq!(lex.next(), token(Token::UserRef(234567890), 1));
723
        assert_eq!(lex.next(), token(Token::UserRef(9), 1));
724
        assert_eq!(lex.next(), token(Token::Colon, 1));
725
        assert_eq!(lex.next(), token(Token::Integer("8765"), 1));
726
        assert_eq!(lex.next(), None);
727
    }
728
}
729

730
Product

Resources

Company