CoCalc -- parse.rs

GitHub Repository: torvalds/linux
Path: blob/master/rust/proc-macro2/parse.rs
⁴⁹¹⁸¹ views
1
// SPDX-License-Identifier: Apache-2.0 OR MIT
2

3
use crate::fallback::{
4
    self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream,
5
    TokenStreamBuilder,
6
};
7
use crate::{Delimiter, Punct, Spacing, TokenTree};
8
use core::char;
9
use core::str::{Bytes, CharIndices, Chars};
10

11
#[derive(Copy, Clone, Eq, PartialEq)]
12
pub(crate) struct Cursor<'a> {
13
    pub(crate) rest: &'a str,
14
    #[cfg(span_locations)]
15
    pub(crate) off: u32,
16
}
17

18
impl<'a> Cursor<'a> {
19
    pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> {
20
        let (_front, rest) = self.rest.split_at(bytes);
21
        Cursor {
22
            rest,
23
            #[cfg(span_locations)]
24
            off: self.off + _front.chars().count() as u32,
25
        }
26
    }
27

28
    pub(crate) fn starts_with(&self, s: &str) -> bool {
29
        self.rest.starts_with(s)
30
    }
31

32
    pub(crate) fn starts_with_char(&self, ch: char) -> bool {
33
        self.rest.starts_with(ch)
34
    }
35

36
    pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
37
    where
38
        Pattern: FnMut(char) -> bool,
39
    {
40
        self.rest.starts_with(f)
41
    }
42

43
    pub(crate) fn is_empty(&self) -> bool {
44
        self.rest.is_empty()
45
    }
46

47
    fn len(&self) -> usize {
48
        self.rest.len()
49
    }
50

51
    fn as_bytes(&self) -> &'a [u8] {
52
        self.rest.as_bytes()
53
    }
54

55
    fn bytes(&self) -> Bytes<'a> {
56
        self.rest.bytes()
57
    }
58

59
    fn chars(&self) -> Chars<'a> {
60
        self.rest.chars()
61
    }
62

63
    fn char_indices(&self) -> CharIndices<'a> {
64
        self.rest.char_indices()
65
    }
66

67
    fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
68
        if self.starts_with(tag) {
69
            Ok(self.advance(tag.len()))
70
        } else {
71
            Err(Reject)
72
        }
73
    }
74
}
75

76
pub(crate) struct Reject;
77
type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
78

79
fn skip_whitespace(input: Cursor) -> Cursor {
80
    let mut s = input;
81

82
    while !s.is_empty() {
83
        let byte = s.as_bytes()[0];
84
        if byte == b'/' {
85
            if s.starts_with("//")
86
                && (!s.starts_with("///") || s.starts_with("////"))
87
                && !s.starts_with("//!")
88
            {
89
                let (cursor, _) = take_until_newline_or_eof(s);
90
                s = cursor;
91
                continue;
92
            } else if s.starts_with("/**/") {
93
                s = s.advance(4);
94
                continue;
95
            } else if s.starts_with("/*")
96
                && (!s.starts_with("/**") || s.starts_with("/***"))
97
                && !s.starts_with("/*!")
98
            {
99
                match block_comment(s) {
100
                    Ok((rest, _)) => {
101
                        s = rest;
102
                        continue;
103
                    }
104
                    Err(Reject) => return s,
105
                }
106
            }
107
        }
108
        match byte {
109
            b' ' | 0x09..=0x0d => {
110
                s = s.advance(1);
111
                continue;
112
            }
113
            b if b.is_ascii() => {}
114
            _ => {
115
                let ch = s.chars().next().unwrap();
116
                if is_whitespace(ch) {
117
                    s = s.advance(ch.len_utf8());
118
                    continue;
119
                }
120
            }
121
        }
122
        return s;
123
    }
124
    s
125
}
126

127
fn block_comment(input: Cursor) -> PResult<&str> {
128
    if !input.starts_with("/*") {
129
        return Err(Reject);
130
    }
131

132
    let mut depth = 0usize;
133
    let bytes = input.as_bytes();
134
    let mut i = 0usize;
135
    let upper = bytes.len() - 1;
136

137
    while i < upper {
138
        if bytes[i] == b'/' && bytes[i + 1] == b'*' {
139
            depth += 1;
140
            i += 1; // eat '*'
141
        } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
142
            depth -= 1;
143
            if depth == 0 {
144
                return Ok((input.advance(i + 2), &input.rest[..i + 2]));
145
            }
146
            i += 1; // eat '/'
147
        }
148
        i += 1;
149
    }
150

151
    Err(Reject)
152
}
153

154
fn is_whitespace(ch: char) -> bool {
155
    // Rust treats left-to-right mark and right-to-left mark as whitespace
156
    ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
157
}
158

159
fn word_break(input: Cursor) -> Result<Cursor, Reject> {
160
    match input.chars().next() {
161
        Some(ch) if is_ident_continue(ch) => Err(Reject),
162
        Some(_) | None => Ok(input),
163
    }
164
}
165

166
// Rustc's representation of a macro expansion error in expression position or
167
// type position.
168
const ERROR: &str = "(/*ERROR*/)";
169

170
pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
171
    let mut trees = TokenStreamBuilder::new();
172
    let mut stack = Vec::new();
173

174
    loop {
175
        input = skip_whitespace(input);
176

177
        if let Ok((rest, ())) = doc_comment(input, &mut trees) {
178
            input = rest;
179
            continue;
180
        }
181

182
        #[cfg(span_locations)]
183
        let lo = input.off;
184

185
        let first = match input.bytes().next() {
186
            Some(first) => first,
187
            None => match stack.last() {
188
                None => return Ok(trees.build()),
189
                #[cfg(span_locations)]
190
                Some((lo, _frame)) => {
191
                    return Err(LexError {
192
                        span: Span { lo: *lo, hi: *lo },
193
                    })
194
                }
195
                #[cfg(not(span_locations))]
196
                Some(_frame) => return Err(LexError { span: Span {} }),
197
            },
198
        };
199

200
        if let Some(open_delimiter) = match first {
201
            b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
202
            b'[' => Some(Delimiter::Bracket),
203
            b'{' => Some(Delimiter::Brace),
204
            _ => None,
205
        } {
206
            input = input.advance(1);
207
            let frame = (open_delimiter, trees);
208
            #[cfg(span_locations)]
209
            let frame = (lo, frame);
210
            stack.push(frame);
211
            trees = TokenStreamBuilder::new();
212
        } else if let Some(close_delimiter) = match first {
213
            b')' => Some(Delimiter::Parenthesis),
214
            b']' => Some(Delimiter::Bracket),
215
            b'}' => Some(Delimiter::Brace),
216
            _ => None,
217
        } {
218
            let frame = match stack.pop() {
219
                Some(frame) => frame,
220
                None => return Err(lex_error(input)),
221
            };
222
            #[cfg(span_locations)]
223
            let (lo, frame) = frame;
224
            let (open_delimiter, outer) = frame;
225
            if open_delimiter != close_delimiter {
226
                return Err(lex_error(input));
227
            }
228
            input = input.advance(1);
229
            let mut g = Group::new(open_delimiter, trees.build());
230
            g.set_span(Span {
231
                #[cfg(span_locations)]
232
                lo,
233
                #[cfg(span_locations)]
234
                hi: input.off,
235
            });
236
            trees = outer;
237
            trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
238
        } else {
239
            let (rest, mut tt) = match leaf_token(input) {
240
                Ok((rest, tt)) => (rest, tt),
241
                Err(Reject) => return Err(lex_error(input)),
242
            };
243
            tt.set_span(crate::Span::_new_fallback(Span {
244
                #[cfg(span_locations)]
245
                lo,
246
                #[cfg(span_locations)]
247
                hi: rest.off,
248
            }));
249
            trees.push_token_from_parser(tt);
250
            input = rest;
251
        }
252
    }
253
}
254

255
fn lex_error(cursor: Cursor) -> LexError {
256
    #[cfg(not(span_locations))]
257
    let _ = cursor;
258
    LexError {
259
        span: Span {
260
            #[cfg(span_locations)]
261
            lo: cursor.off,
262
            #[cfg(span_locations)]
263
            hi: cursor.off,
264
        },
265
    }
266
}
267

268
fn leaf_token(input: Cursor) -> PResult<TokenTree> {
269
    if let Ok((input, l)) = literal(input) {
270
        // must be parsed before ident
271
        Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
272
    } else if let Ok((input, p)) = punct(input) {
273
        Ok((input, TokenTree::Punct(p)))
274
    } else if let Ok((input, i)) = ident(input) {
275
        Ok((input, TokenTree::Ident(i)))
276
    } else if input.starts_with(ERROR) {
277
        let rest = input.advance(ERROR.len());
278
        let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
279
        Ok((rest, TokenTree::Literal(repr)))
280
    } else {
281
        Err(Reject)
282
    }
283
}
284

285
fn ident(input: Cursor) -> PResult<crate::Ident> {
286
    if [
287
        "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
288
    ]
289
    .iter()
290
    .any(|prefix| input.starts_with(prefix))
291
    {
292
        Err(Reject)
293
    } else {
294
        ident_any(input)
295
    }
296
}
297

298
fn ident_any(input: Cursor) -> PResult<crate::Ident> {
299
    let raw = input.starts_with("r#");
300
    let rest = input.advance((raw as usize) << 1);
301

302
    let (rest, sym) = ident_not_raw(rest)?;
303

304
    if !raw {
305
        let ident =
306
            crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site()));
307
        return Ok((rest, ident));
308
    }
309

310
    match sym {
311
        "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
312
        _ => {}
313
    }
314

315
    let ident =
316
        crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site()));
317
    Ok((rest, ident))
318
}
319

320
fn ident_not_raw(input: Cursor) -> PResult<&str> {
321
    let mut chars = input.char_indices();
322

323
    match chars.next() {
324
        Some((_, ch)) if is_ident_start(ch) => {}
325
        _ => return Err(Reject),
326
    }
327

328
    let mut end = input.len();
329
    for (i, ch) in chars {
330
        if !is_ident_continue(ch) {
331
            end = i;
332
            break;
333
        }
334
    }
335

336
    Ok((input.advance(end), &input.rest[..end]))
337
}
338

339
pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
340
    let rest = literal_nocapture(input)?;
341
    let end = input.len() - rest.len();
342
    Ok((rest, Literal::_new(input.rest[..end].to_string())))
343
}
344

345
fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
346
    if let Ok(ok) = string(input) {
347
        Ok(ok)
348
    } else if let Ok(ok) = byte_string(input) {
349
        Ok(ok)
350
    } else if let Ok(ok) = c_string(input) {
351
        Ok(ok)
352
    } else if let Ok(ok) = byte(input) {
353
        Ok(ok)
354
    } else if let Ok(ok) = character(input) {
355
        Ok(ok)
356
    } else if let Ok(ok) = float(input) {
357
        Ok(ok)
358
    } else if let Ok(ok) = int(input) {
359
        Ok(ok)
360
    } else {
361
        Err(Reject)
362
    }
363
}
364

365
fn literal_suffix(input: Cursor) -> Cursor {
366
    match ident_not_raw(input) {
367
        Ok((input, _)) => input,
368
        Err(Reject) => input,
369
    }
370
}
371

372
fn string(input: Cursor) -> Result<Cursor, Reject> {
373
    if let Ok(input) = input.parse("\"") {
374
        cooked_string(input)
375
    } else if let Ok(input) = input.parse("r") {
376
        raw_string(input)
377
    } else {
378
        Err(Reject)
379
    }
380
}
381

382
fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
383
    let mut chars = input.char_indices();
384

385
    while let Some((i, ch)) = chars.next() {
386
        match ch {
387
            '"' => {
388
                let input = input.advance(i + 1);
389
                return Ok(literal_suffix(input));
390
            }
391
            '\r' => match chars.next() {
392
                Some((_, '\n')) => {}
393
                _ => break,
394
            },
395
            '\\' => match chars.next() {
396
                Some((_, 'x')) => {
397
                    backslash_x_char(&mut chars)?;
398
                }
399
                Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
400
                Some((_, 'u')) => {
401
                    backslash_u(&mut chars)?;
402
                }
403
                Some((newline, ch @ ('\n' | '\r'))) => {
404
                    input = input.advance(newline + 1);
405
                    trailing_backslash(&mut input, ch as u8)?;
406
                    chars = input.char_indices();
407
                }
408
                _ => break,
409
            },
410
            _ch => {}
411
        }
412
    }
413
    Err(Reject)
414
}
415

416
fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
417
    let (input, delimiter) = delimiter_of_raw_string(input)?;
418
    let mut bytes = input.bytes().enumerate();
419
    while let Some((i, byte)) = bytes.next() {
420
        match byte {
421
            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
422
                let rest = input.advance(i + 1 + delimiter.len());
423
                return Ok(literal_suffix(rest));
424
            }
425
            b'\r' => match bytes.next() {
426
                Some((_, b'\n')) => {}
427
                _ => break,
428
            },
429
            _ => {}
430
        }
431
    }
432
    Err(Reject)
433
}
434

435
fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
436
    if let Ok(input) = input.parse("b\"") {
437
        cooked_byte_string(input)
438
    } else if let Ok(input) = input.parse("br") {
439
        raw_byte_string(input)
440
    } else {
441
        Err(Reject)
442
    }
443
}
444

445
fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
446
    let mut bytes = input.bytes().enumerate();
447
    while let Some((offset, b)) = bytes.next() {
448
        match b {
449
            b'"' => {
450
                let input = input.advance(offset + 1);
451
                return Ok(literal_suffix(input));
452
            }
453
            b'\r' => match bytes.next() {
454
                Some((_, b'\n')) => {}
455
                _ => break,
456
            },
457
            b'\\' => match bytes.next() {
458
                Some((_, b'x')) => {
459
                    backslash_x_byte(&mut bytes)?;
460
                }
461
                Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
462
                Some((newline, b @ (b'\n' | b'\r'))) => {
463
                    input = input.advance(newline + 1);
464
                    trailing_backslash(&mut input, b)?;
465
                    bytes = input.bytes().enumerate();
466
                }
467
                _ => break,
468
            },
469
            b if b.is_ascii() => {}
470
            _ => break,
471
        }
472
    }
473
    Err(Reject)
474
}
475

476
fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
477
    for (i, byte) in input.bytes().enumerate() {
478
        match byte {
479
            b'"' => {
480
                if i > 255 {
481
                    // https://github.com/rust-lang/rust/pull/95251
482
                    return Err(Reject);
483
                }
484
                return Ok((input.advance(i + 1), &input.rest[..i]));
485
            }
486
            b'#' => {}
487
            _ => break,
488
        }
489
    }
490
    Err(Reject)
491
}
492

493
fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
494
    let (input, delimiter) = delimiter_of_raw_string(input)?;
495
    let mut bytes = input.bytes().enumerate();
496
    while let Some((i, byte)) = bytes.next() {
497
        match byte {
498
            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
499
                let rest = input.advance(i + 1 + delimiter.len());
500
                return Ok(literal_suffix(rest));
501
            }
502
            b'\r' => match bytes.next() {
503
                Some((_, b'\n')) => {}
504
                _ => break,
505
            },
506
            other => {
507
                if !other.is_ascii() {
508
                    break;
509
                }
510
            }
511
        }
512
    }
513
    Err(Reject)
514
}
515

516
fn c_string(input: Cursor) -> Result<Cursor, Reject> {
517
    if let Ok(input) = input.parse("c\"") {
518
        cooked_c_string(input)
519
    } else if let Ok(input) = input.parse("cr") {
520
        raw_c_string(input)
521
    } else {
522
        Err(Reject)
523
    }
524
}
525

526
fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
527
    let (input, delimiter) = delimiter_of_raw_string(input)?;
528
    let mut bytes = input.bytes().enumerate();
529
    while let Some((i, byte)) = bytes.next() {
530
        match byte {
531
            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
532
                let rest = input.advance(i + 1 + delimiter.len());
533
                return Ok(literal_suffix(rest));
534
            }
535
            b'\r' => match bytes.next() {
536
                Some((_, b'\n')) => {}
537
                _ => break,
538
            },
539
            b'\0' => break,
540
            _ => {}
541
        }
542
    }
543
    Err(Reject)
544
}
545

546
fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
547
    let mut chars = input.char_indices();
548

549
    while let Some((i, ch)) = chars.next() {
550
        match ch {
551
            '"' => {
552
                let input = input.advance(i + 1);
553
                return Ok(literal_suffix(input));
554
            }
555
            '\r' => match chars.next() {
556
                Some((_, '\n')) => {}
557
                _ => break,
558
            },
559
            '\\' => match chars.next() {
560
                Some((_, 'x')) => {
561
                    backslash_x_nonzero(&mut chars)?;
562
                }
563
                Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
564
                Some((_, 'u')) => {
565
                    if backslash_u(&mut chars)? == '\0' {
566
                        break;
567
                    }
568
                }
569
                Some((newline, ch @ ('\n' | '\r'))) => {
570
                    input = input.advance(newline + 1);
571
                    trailing_backslash(&mut input, ch as u8)?;
572
                    chars = input.char_indices();
573
                }
574
                _ => break,
575
            },
576
            '\0' => break,
577
            _ch => {}
578
        }
579
    }
580
    Err(Reject)
581
}
582

583
fn byte(input: Cursor) -> Result<Cursor, Reject> {
584
    let input = input.parse("b'")?;
585
    let mut bytes = input.bytes().enumerate();
586
    let ok = match bytes.next().map(|(_, b)| b) {
587
        Some(b'\\') => match bytes.next().map(|(_, b)| b) {
588
            Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
589
            Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
590
            _ => false,
591
        },
592
        b => b.is_some(),
593
    };
594
    if !ok {
595
        return Err(Reject);
596
    }
597
    let (offset, _) = bytes.next().ok_or(Reject)?;
598
    if !input.chars().as_str().is_char_boundary(offset) {
599
        return Err(Reject);
600
    }
601
    let input = input.advance(offset).parse("'")?;
602
    Ok(literal_suffix(input))
603
}
604

605
fn character(input: Cursor) -> Result<Cursor, Reject> {
606
    let input = input.parse("'")?;
607
    let mut chars = input.char_indices();
608
    let ok = match chars.next().map(|(_, ch)| ch) {
609
        Some('\\') => match chars.next().map(|(_, ch)| ch) {
610
            Some('x') => backslash_x_char(&mut chars).is_ok(),
611
            Some('u') => backslash_u(&mut chars).is_ok(),
612
            Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
613
            _ => false,
614
        },
615
        ch => ch.is_some(),
616
    };
617
    if !ok {
618
        return Err(Reject);
619
    }
620
    let (idx, _) = chars.next().ok_or(Reject)?;
621
    let input = input.advance(idx).parse("'")?;
622
    Ok(literal_suffix(input))
623
}
624

625
macro_rules! next_ch {
626
    ($chars:ident @ $pat:pat) => {
627
        match $chars.next() {
628
            Some((_, ch)) => match ch {
629
                $pat => ch,
630
                _ => return Err(Reject),
631
            },
632
            None => return Err(Reject),
633
        }
634
    };
635
}
636

637
fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
638
where
639
    I: Iterator<Item = (usize, char)>,
640
{
641
    next_ch!(chars @ '0'..='7');
642
    next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
643
    Ok(())
644
}
645

646
fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
647
where
648
    I: Iterator<Item = (usize, u8)>,
649
{
650
    next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
651
    next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
652
    Ok(())
653
}
654

655
fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
656
where
657
    I: Iterator<Item = (usize, char)>,
658
{
659
    let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
660
    let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
661
    if first == '0' && second == '0' {
662
        Err(Reject)
663
    } else {
664
        Ok(())
665
    }
666
}
667

668
fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
669
where
670
    I: Iterator<Item = (usize, char)>,
671
{
672
    next_ch!(chars @ '{');
673
    let mut value = 0;
674
    let mut len = 0;
675
    for (_, ch) in chars {
676
        let digit = match ch {
677
            '0'..='9' => ch as u8 - b'0',
678
            'a'..='f' => 10 + ch as u8 - b'a',
679
            'A'..='F' => 10 + ch as u8 - b'A',
680
            '_' if len > 0 => continue,
681
            '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
682
            _ => break,
683
        };
684
        if len == 6 {
685
            break;
686
        }
687
        value *= 0x10;
688
        value += u32::from(digit);
689
        len += 1;
690
    }
691
    Err(Reject)
692
}
693

694
fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
695
    let mut whitespace = input.bytes().enumerate();
696
    loop {
697
        if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
698
            return Err(Reject);
699
        }
700
        match whitespace.next() {
701
            Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
702
                last = b;
703
            }
704
            Some((offset, _)) => {
705
                *input = input.advance(offset);
706
                return Ok(());
707
            }
708
            None => return Err(Reject),
709
        }
710
    }
711
}
712

713
fn float(input: Cursor) -> Result<Cursor, Reject> {
714
    let mut rest = float_digits(input)?;
715
    if let Some(ch) = rest.chars().next() {
716
        if is_ident_start(ch) {
717
            rest = ident_not_raw(rest)?.0;
718
        }
719
    }
720
    word_break(rest)
721
}
722

723
fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
724
    let mut chars = input.chars().peekable();
725
    match chars.next() {
726
        Some(ch) if '0' <= ch && ch <= '9' => {}
727
        _ => return Err(Reject),
728
    }
729

730
    let mut len = 1;
731
    let mut has_dot = false;
732
    let mut has_exp = false;
733
    while let Some(&ch) = chars.peek() {
734
        match ch {
735
            '0'..='9' | '_' => {
736
                chars.next();
737
                len += 1;
738
            }
739
            '.' => {
740
                if has_dot {
741
                    break;
742
                }
743
                chars.next();
744
                if chars
745
                    .peek()
746
                    .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
747
                {
748
                    return Err(Reject);
749
                }
750
                len += 1;
751
                has_dot = true;
752
            }
753
            'e' | 'E' => {
754
                chars.next();
755
                len += 1;
756
                has_exp = true;
757
                break;
758
            }
759
            _ => break,
760
        }
761
    }
762

763
    if !(has_dot || has_exp) {
764
        return Err(Reject);
765
    }
766

767
    if has_exp {
768
        let token_before_exp = if has_dot {
769
            Ok(input.advance(len - 1))
770
        } else {
771
            Err(Reject)
772
        };
773
        let mut has_sign = false;
774
        let mut has_exp_value = false;
775
        while let Some(&ch) = chars.peek() {
776
            match ch {
777
                '+' | '-' => {
778
                    if has_exp_value {
779
                        break;
780
                    }
781
                    if has_sign {
782
                        return token_before_exp;
783
                    }
784
                    chars.next();
785
                    len += 1;
786
                    has_sign = true;
787
                }
788
                '0'..='9' => {
789
                    chars.next();
790
                    len += 1;
791
                    has_exp_value = true;
792
                }
793
                '_' => {
794
                    chars.next();
795
                    len += 1;
796
                }
797
                _ => break,
798
            }
799
        }
800
        if !has_exp_value {
801
            return token_before_exp;
802
        }
803
    }
804

805
    Ok(input.advance(len))
806
}
807

808
fn int(input: Cursor) -> Result<Cursor, Reject> {
809
    let mut rest = digits(input)?;
810
    if let Some(ch) = rest.chars().next() {
811
        if is_ident_start(ch) {
812
            rest = ident_not_raw(rest)?.0;
813
        }
814
    }
815
    word_break(rest)
816
}
817

818
fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
819
    let base = if input.starts_with("0x") {
820
        input = input.advance(2);
821
        16
822
    } else if input.starts_with("0o") {
823
        input = input.advance(2);
824
        8
825
    } else if input.starts_with("0b") {
826
        input = input.advance(2);
827
        2
828
    } else {
829
        10
830
    };
831

832
    let mut len = 0;
833
    let mut empty = true;
834
    for b in input.bytes() {
835
        match b {
836
            b'0'..=b'9' => {
837
                let digit = (b - b'0') as u64;
838
                if digit >= base {
839
                    return Err(Reject);
840
                }
841
            }
842
            b'a'..=b'f' => {
843
                let digit = 10 + (b - b'a') as u64;
844
                if digit >= base {
845
                    break;
846
                }
847
            }
848
            b'A'..=b'F' => {
849
                let digit = 10 + (b - b'A') as u64;
850
                if digit >= base {
851
                    break;
852
                }
853
            }
854
            b'_' => {
855
                if empty && base == 10 {
856
                    return Err(Reject);
857
                }
858
                len += 1;
859
                continue;
860
            }
861
            _ => break,
862
        }
863
        len += 1;
864
        empty = false;
865
    }
866
    if empty {
867
        Err(Reject)
868
    } else {
869
        Ok(input.advance(len))
870
    }
871
}
872

873
fn punct(input: Cursor) -> PResult<Punct> {
874
    let (rest, ch) = punct_char(input)?;
875
    if ch == '\'' {
876
        let (after_lifetime, _ident) = ident_any(rest)?;
877
        if after_lifetime.starts_with_char('\'')
878
            || (after_lifetime.starts_with_char('#') && !rest.starts_with("r#"))
879
        {
880
            Err(Reject)
881
        } else {
882
            Ok((rest, Punct::new('\'', Spacing::Joint)))
883
        }
884
    } else {
885
        let kind = match punct_char(rest) {
886
            Ok(_) => Spacing::Joint,
887
            Err(Reject) => Spacing::Alone,
888
        };
889
        Ok((rest, Punct::new(ch, kind)))
890
    }
891
}
892

893
fn punct_char(input: Cursor) -> PResult<char> {
894
    if input.starts_with("//") || input.starts_with("/*") {
895
        // Do not accept `/` of a comment as a punct.
896
        return Err(Reject);
897
    }
898

899
    let mut chars = input.chars();
900
    let first = match chars.next() {
901
        Some(ch) => ch,
902
        None => {
903
            return Err(Reject);
904
        }
905
    };
906
    let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
907
    if recognized.contains(first) {
908
        Ok((input.advance(first.len_utf8()), first))
909
    } else {
910
        Err(Reject)
911
    }
912
}
913

914
fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
915
    #[cfg(span_locations)]
916
    let lo = input.off;
917
    let (rest, (comment, inner)) = doc_comment_contents(input)?;
918
    let fallback_span = Span {
919
        #[cfg(span_locations)]
920
        lo,
921
        #[cfg(span_locations)]
922
        hi: rest.off,
923
    };
924
    let span = crate::Span::_new_fallback(fallback_span);
925

926
    let mut scan_for_bare_cr = comment;
927
    while let Some(cr) = scan_for_bare_cr.find('\r') {
928
        let rest = &scan_for_bare_cr[cr + 1..];
929
        if !rest.starts_with('\n') {
930
            return Err(Reject);
931
        }
932
        scan_for_bare_cr = rest;
933
    }
934

935
    let mut pound = Punct::new('#', Spacing::Alone);
936
    pound.set_span(span);
937
    trees.push_token_from_parser(TokenTree::Punct(pound));
938

939
    if inner {
940
        let mut bang = Punct::new('!', Spacing::Alone);
941
        bang.set_span(span);
942
        trees.push_token_from_parser(TokenTree::Punct(bang));
943
    }
944

945
    let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span));
946
    let mut equal = Punct::new('=', Spacing::Alone);
947
    equal.set_span(span);
948
    let mut literal = crate::Literal::_new_fallback(Literal::string(comment));
949
    literal.set_span(span);
950
    let mut bracketed = TokenStreamBuilder::with_capacity(3);
951
    bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
952
    bracketed.push_token_from_parser(TokenTree::Punct(equal));
953
    bracketed.push_token_from_parser(TokenTree::Literal(literal));
954
    let group = Group::new(Delimiter::Bracket, bracketed.build());
955
    let mut group = crate::Group::_new_fallback(group);
956
    group.set_span(span);
957
    trees.push_token_from_parser(TokenTree::Group(group));
958

959
    Ok((rest, ()))
960
}
961

962
fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
963
    if input.starts_with("//!") {
964
        let input = input.advance(3);
965
        let (input, s) = take_until_newline_or_eof(input);
966
        Ok((input, (s, true)))
967
    } else if input.starts_with("/*!") {
968
        let (input, s) = block_comment(input)?;
969
        Ok((input, (&s[3..s.len() - 2], true)))
970
    } else if input.starts_with("///") {
971
        let input = input.advance(3);
972
        if input.starts_with_char('/') {
973
            return Err(Reject);
974
        }
975
        let (input, s) = take_until_newline_or_eof(input);
976
        Ok((input, (s, false)))
977
    } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
978
        let (input, s) = block_comment(input)?;
979
        Ok((input, (&s[3..s.len() - 2], false)))
980
    } else {
981
        Err(Reject)
982
    }
983
}
984

985
fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
986
    let chars = input.char_indices();
987

988
    for (i, ch) in chars {
989
        if ch == '\n' {
990
            return (input.advance(i), &input.rest[..i]);
991
        } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
992
            return (input.advance(i + 1), &input.rest[..i]);
993
        }
994
    }
995

996
    (input.advance(input.len()), input.rest)
997
}
998

999
Product

Resources

Company