Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/rust/proc-macro2/parse.rs
38271 views
1
// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3
use crate::fallback::{
4
self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream,
5
TokenStreamBuilder,
6
};
7
use crate::{Delimiter, Punct, Spacing, TokenTree};
8
use core::char;
9
use core::str::{Bytes, CharIndices, Chars};
10
11
#[derive(Copy, Clone, Eq, PartialEq)]
12
pub(crate) struct Cursor<'a> {
13
pub(crate) rest: &'a str,
14
#[cfg(span_locations)]
15
pub(crate) off: u32,
16
}
17
18
impl<'a> Cursor<'a> {
19
pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> {
20
let (_front, rest) = self.rest.split_at(bytes);
21
Cursor {
22
rest,
23
#[cfg(span_locations)]
24
off: self.off + _front.chars().count() as u32,
25
}
26
}
27
28
pub(crate) fn starts_with(&self, s: &str) -> bool {
29
self.rest.starts_with(s)
30
}
31
32
pub(crate) fn starts_with_char(&self, ch: char) -> bool {
33
self.rest.starts_with(ch)
34
}
35
36
pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
37
where
38
Pattern: FnMut(char) -> bool,
39
{
40
self.rest.starts_with(f)
41
}
42
43
pub(crate) fn is_empty(&self) -> bool {
44
self.rest.is_empty()
45
}
46
47
fn len(&self) -> usize {
48
self.rest.len()
49
}
50
51
fn as_bytes(&self) -> &'a [u8] {
52
self.rest.as_bytes()
53
}
54
55
fn bytes(&self) -> Bytes<'a> {
56
self.rest.bytes()
57
}
58
59
fn chars(&self) -> Chars<'a> {
60
self.rest.chars()
61
}
62
63
fn char_indices(&self) -> CharIndices<'a> {
64
self.rest.char_indices()
65
}
66
67
fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
68
if self.starts_with(tag) {
69
Ok(self.advance(tag.len()))
70
} else {
71
Err(Reject)
72
}
73
}
74
}
75
76
pub(crate) struct Reject;
77
type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
78
79
fn skip_whitespace(input: Cursor) -> Cursor {
80
let mut s = input;
81
82
while !s.is_empty() {
83
let byte = s.as_bytes()[0];
84
if byte == b'/' {
85
if s.starts_with("//")
86
&& (!s.starts_with("///") || s.starts_with("////"))
87
&& !s.starts_with("//!")
88
{
89
let (cursor, _) = take_until_newline_or_eof(s);
90
s = cursor;
91
continue;
92
} else if s.starts_with("/**/") {
93
s = s.advance(4);
94
continue;
95
} else if s.starts_with("/*")
96
&& (!s.starts_with("/**") || s.starts_with("/***"))
97
&& !s.starts_with("/*!")
98
{
99
match block_comment(s) {
100
Ok((rest, _)) => {
101
s = rest;
102
continue;
103
}
104
Err(Reject) => return s,
105
}
106
}
107
}
108
match byte {
109
b' ' | 0x09..=0x0d => {
110
s = s.advance(1);
111
continue;
112
}
113
b if b.is_ascii() => {}
114
_ => {
115
let ch = s.chars().next().unwrap();
116
if is_whitespace(ch) {
117
s = s.advance(ch.len_utf8());
118
continue;
119
}
120
}
121
}
122
return s;
123
}
124
s
125
}
126
127
fn block_comment(input: Cursor) -> PResult<&str> {
128
if !input.starts_with("/*") {
129
return Err(Reject);
130
}
131
132
let mut depth = 0usize;
133
let bytes = input.as_bytes();
134
let mut i = 0usize;
135
let upper = bytes.len() - 1;
136
137
while i < upper {
138
if bytes[i] == b'/' && bytes[i + 1] == b'*' {
139
depth += 1;
140
i += 1; // eat '*'
141
} else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
142
depth -= 1;
143
if depth == 0 {
144
return Ok((input.advance(i + 2), &input.rest[..i + 2]));
145
}
146
i += 1; // eat '/'
147
}
148
i += 1;
149
}
150
151
Err(Reject)
152
}
153
154
fn is_whitespace(ch: char) -> bool {
155
// Rust treats left-to-right mark and right-to-left mark as whitespace
156
ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
157
}
158
159
fn word_break(input: Cursor) -> Result<Cursor, Reject> {
160
match input.chars().next() {
161
Some(ch) if is_ident_continue(ch) => Err(Reject),
162
Some(_) | None => Ok(input),
163
}
164
}
165
166
// Rustc's representation of a macro expansion error in expression position or
167
// type position.
168
const ERROR: &str = "(/*ERROR*/)";
169
170
pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
171
let mut trees = TokenStreamBuilder::new();
172
let mut stack = Vec::new();
173
174
loop {
175
input = skip_whitespace(input);
176
177
if let Ok((rest, ())) = doc_comment(input, &mut trees) {
178
input = rest;
179
continue;
180
}
181
182
#[cfg(span_locations)]
183
let lo = input.off;
184
185
let first = match input.bytes().next() {
186
Some(first) => first,
187
None => match stack.last() {
188
None => return Ok(trees.build()),
189
#[cfg(span_locations)]
190
Some((lo, _frame)) => {
191
return Err(LexError {
192
span: Span { lo: *lo, hi: *lo },
193
})
194
}
195
#[cfg(not(span_locations))]
196
Some(_frame) => return Err(LexError { span: Span {} }),
197
},
198
};
199
200
if let Some(open_delimiter) = match first {
201
b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
202
b'[' => Some(Delimiter::Bracket),
203
b'{' => Some(Delimiter::Brace),
204
_ => None,
205
} {
206
input = input.advance(1);
207
let frame = (open_delimiter, trees);
208
#[cfg(span_locations)]
209
let frame = (lo, frame);
210
stack.push(frame);
211
trees = TokenStreamBuilder::new();
212
} else if let Some(close_delimiter) = match first {
213
b')' => Some(Delimiter::Parenthesis),
214
b']' => Some(Delimiter::Bracket),
215
b'}' => Some(Delimiter::Brace),
216
_ => None,
217
} {
218
let frame = match stack.pop() {
219
Some(frame) => frame,
220
None => return Err(lex_error(input)),
221
};
222
#[cfg(span_locations)]
223
let (lo, frame) = frame;
224
let (open_delimiter, outer) = frame;
225
if open_delimiter != close_delimiter {
226
return Err(lex_error(input));
227
}
228
input = input.advance(1);
229
let mut g = Group::new(open_delimiter, trees.build());
230
g.set_span(Span {
231
#[cfg(span_locations)]
232
lo,
233
#[cfg(span_locations)]
234
hi: input.off,
235
});
236
trees = outer;
237
trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
238
} else {
239
let (rest, mut tt) = match leaf_token(input) {
240
Ok((rest, tt)) => (rest, tt),
241
Err(Reject) => return Err(lex_error(input)),
242
};
243
tt.set_span(crate::Span::_new_fallback(Span {
244
#[cfg(span_locations)]
245
lo,
246
#[cfg(span_locations)]
247
hi: rest.off,
248
}));
249
trees.push_token_from_parser(tt);
250
input = rest;
251
}
252
}
253
}
254
255
fn lex_error(cursor: Cursor) -> LexError {
256
#[cfg(not(span_locations))]
257
let _ = cursor;
258
LexError {
259
span: Span {
260
#[cfg(span_locations)]
261
lo: cursor.off,
262
#[cfg(span_locations)]
263
hi: cursor.off,
264
},
265
}
266
}
267
268
fn leaf_token(input: Cursor) -> PResult<TokenTree> {
269
if let Ok((input, l)) = literal(input) {
270
// must be parsed before ident
271
Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
272
} else if let Ok((input, p)) = punct(input) {
273
Ok((input, TokenTree::Punct(p)))
274
} else if let Ok((input, i)) = ident(input) {
275
Ok((input, TokenTree::Ident(i)))
276
} else if input.starts_with(ERROR) {
277
let rest = input.advance(ERROR.len());
278
let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
279
Ok((rest, TokenTree::Literal(repr)))
280
} else {
281
Err(Reject)
282
}
283
}
284
285
fn ident(input: Cursor) -> PResult<crate::Ident> {
286
if [
287
"r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
288
]
289
.iter()
290
.any(|prefix| input.starts_with(prefix))
291
{
292
Err(Reject)
293
} else {
294
ident_any(input)
295
}
296
}
297
298
fn ident_any(input: Cursor) -> PResult<crate::Ident> {
299
let raw = input.starts_with("r#");
300
let rest = input.advance((raw as usize) << 1);
301
302
let (rest, sym) = ident_not_raw(rest)?;
303
304
if !raw {
305
let ident =
306
crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site()));
307
return Ok((rest, ident));
308
}
309
310
match sym {
311
"_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
312
_ => {}
313
}
314
315
let ident =
316
crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site()));
317
Ok((rest, ident))
318
}
319
320
fn ident_not_raw(input: Cursor) -> PResult<&str> {
321
let mut chars = input.char_indices();
322
323
match chars.next() {
324
Some((_, ch)) if is_ident_start(ch) => {}
325
_ => return Err(Reject),
326
}
327
328
let mut end = input.len();
329
for (i, ch) in chars {
330
if !is_ident_continue(ch) {
331
end = i;
332
break;
333
}
334
}
335
336
Ok((input.advance(end), &input.rest[..end]))
337
}
338
339
pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
340
let rest = literal_nocapture(input)?;
341
let end = input.len() - rest.len();
342
Ok((rest, Literal::_new(input.rest[..end].to_string())))
343
}
344
345
fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
346
if let Ok(ok) = string(input) {
347
Ok(ok)
348
} else if let Ok(ok) = byte_string(input) {
349
Ok(ok)
350
} else if let Ok(ok) = c_string(input) {
351
Ok(ok)
352
} else if let Ok(ok) = byte(input) {
353
Ok(ok)
354
} else if let Ok(ok) = character(input) {
355
Ok(ok)
356
} else if let Ok(ok) = float(input) {
357
Ok(ok)
358
} else if let Ok(ok) = int(input) {
359
Ok(ok)
360
} else {
361
Err(Reject)
362
}
363
}
364
365
fn literal_suffix(input: Cursor) -> Cursor {
366
match ident_not_raw(input) {
367
Ok((input, _)) => input,
368
Err(Reject) => input,
369
}
370
}
371
372
fn string(input: Cursor) -> Result<Cursor, Reject> {
373
if let Ok(input) = input.parse("\"") {
374
cooked_string(input)
375
} else if let Ok(input) = input.parse("r") {
376
raw_string(input)
377
} else {
378
Err(Reject)
379
}
380
}
381
382
fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
383
let mut chars = input.char_indices();
384
385
while let Some((i, ch)) = chars.next() {
386
match ch {
387
'"' => {
388
let input = input.advance(i + 1);
389
return Ok(literal_suffix(input));
390
}
391
'\r' => match chars.next() {
392
Some((_, '\n')) => {}
393
_ => break,
394
},
395
'\\' => match chars.next() {
396
Some((_, 'x')) => {
397
backslash_x_char(&mut chars)?;
398
}
399
Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
400
Some((_, 'u')) => {
401
backslash_u(&mut chars)?;
402
}
403
Some((newline, ch @ ('\n' | '\r'))) => {
404
input = input.advance(newline + 1);
405
trailing_backslash(&mut input, ch as u8)?;
406
chars = input.char_indices();
407
}
408
_ => break,
409
},
410
_ch => {}
411
}
412
}
413
Err(Reject)
414
}
415
416
fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
417
let (input, delimiter) = delimiter_of_raw_string(input)?;
418
let mut bytes = input.bytes().enumerate();
419
while let Some((i, byte)) = bytes.next() {
420
match byte {
421
b'"' if input.rest[i + 1..].starts_with(delimiter) => {
422
let rest = input.advance(i + 1 + delimiter.len());
423
return Ok(literal_suffix(rest));
424
}
425
b'\r' => match bytes.next() {
426
Some((_, b'\n')) => {}
427
_ => break,
428
},
429
_ => {}
430
}
431
}
432
Err(Reject)
433
}
434
435
fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
436
if let Ok(input) = input.parse("b\"") {
437
cooked_byte_string(input)
438
} else if let Ok(input) = input.parse("br") {
439
raw_byte_string(input)
440
} else {
441
Err(Reject)
442
}
443
}
444
445
fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
446
let mut bytes = input.bytes().enumerate();
447
while let Some((offset, b)) = bytes.next() {
448
match b {
449
b'"' => {
450
let input = input.advance(offset + 1);
451
return Ok(literal_suffix(input));
452
}
453
b'\r' => match bytes.next() {
454
Some((_, b'\n')) => {}
455
_ => break,
456
},
457
b'\\' => match bytes.next() {
458
Some((_, b'x')) => {
459
backslash_x_byte(&mut bytes)?;
460
}
461
Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
462
Some((newline, b @ (b'\n' | b'\r'))) => {
463
input = input.advance(newline + 1);
464
trailing_backslash(&mut input, b)?;
465
bytes = input.bytes().enumerate();
466
}
467
_ => break,
468
},
469
b if b.is_ascii() => {}
470
_ => break,
471
}
472
}
473
Err(Reject)
474
}
475
476
fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
477
for (i, byte) in input.bytes().enumerate() {
478
match byte {
479
b'"' => {
480
if i > 255 {
481
// https://github.com/rust-lang/rust/pull/95251
482
return Err(Reject);
483
}
484
return Ok((input.advance(i + 1), &input.rest[..i]));
485
}
486
b'#' => {}
487
_ => break,
488
}
489
}
490
Err(Reject)
491
}
492
493
fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
494
let (input, delimiter) = delimiter_of_raw_string(input)?;
495
let mut bytes = input.bytes().enumerate();
496
while let Some((i, byte)) = bytes.next() {
497
match byte {
498
b'"' if input.rest[i + 1..].starts_with(delimiter) => {
499
let rest = input.advance(i + 1 + delimiter.len());
500
return Ok(literal_suffix(rest));
501
}
502
b'\r' => match bytes.next() {
503
Some((_, b'\n')) => {}
504
_ => break,
505
},
506
other => {
507
if !other.is_ascii() {
508
break;
509
}
510
}
511
}
512
}
513
Err(Reject)
514
}
515
516
fn c_string(input: Cursor) -> Result<Cursor, Reject> {
517
if let Ok(input) = input.parse("c\"") {
518
cooked_c_string(input)
519
} else if let Ok(input) = input.parse("cr") {
520
raw_c_string(input)
521
} else {
522
Err(Reject)
523
}
524
}
525
526
fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
527
let (input, delimiter) = delimiter_of_raw_string(input)?;
528
let mut bytes = input.bytes().enumerate();
529
while let Some((i, byte)) = bytes.next() {
530
match byte {
531
b'"' if input.rest[i + 1..].starts_with(delimiter) => {
532
let rest = input.advance(i + 1 + delimiter.len());
533
return Ok(literal_suffix(rest));
534
}
535
b'\r' => match bytes.next() {
536
Some((_, b'\n')) => {}
537
_ => break,
538
},
539
b'\0' => break,
540
_ => {}
541
}
542
}
543
Err(Reject)
544
}
545
546
fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
547
let mut chars = input.char_indices();
548
549
while let Some((i, ch)) = chars.next() {
550
match ch {
551
'"' => {
552
let input = input.advance(i + 1);
553
return Ok(literal_suffix(input));
554
}
555
'\r' => match chars.next() {
556
Some((_, '\n')) => {}
557
_ => break,
558
},
559
'\\' => match chars.next() {
560
Some((_, 'x')) => {
561
backslash_x_nonzero(&mut chars)?;
562
}
563
Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
564
Some((_, 'u')) => {
565
if backslash_u(&mut chars)? == '\0' {
566
break;
567
}
568
}
569
Some((newline, ch @ ('\n' | '\r'))) => {
570
input = input.advance(newline + 1);
571
trailing_backslash(&mut input, ch as u8)?;
572
chars = input.char_indices();
573
}
574
_ => break,
575
},
576
'\0' => break,
577
_ch => {}
578
}
579
}
580
Err(Reject)
581
}
582
583
fn byte(input: Cursor) -> Result<Cursor, Reject> {
584
let input = input.parse("b'")?;
585
let mut bytes = input.bytes().enumerate();
586
let ok = match bytes.next().map(|(_, b)| b) {
587
Some(b'\\') => match bytes.next().map(|(_, b)| b) {
588
Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
589
Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
590
_ => false,
591
},
592
b => b.is_some(),
593
};
594
if !ok {
595
return Err(Reject);
596
}
597
let (offset, _) = bytes.next().ok_or(Reject)?;
598
if !input.chars().as_str().is_char_boundary(offset) {
599
return Err(Reject);
600
}
601
let input = input.advance(offset).parse("'")?;
602
Ok(literal_suffix(input))
603
}
604
605
fn character(input: Cursor) -> Result<Cursor, Reject> {
606
let input = input.parse("'")?;
607
let mut chars = input.char_indices();
608
let ok = match chars.next().map(|(_, ch)| ch) {
609
Some('\\') => match chars.next().map(|(_, ch)| ch) {
610
Some('x') => backslash_x_char(&mut chars).is_ok(),
611
Some('u') => backslash_u(&mut chars).is_ok(),
612
Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
613
_ => false,
614
},
615
ch => ch.is_some(),
616
};
617
if !ok {
618
return Err(Reject);
619
}
620
let (idx, _) = chars.next().ok_or(Reject)?;
621
let input = input.advance(idx).parse("'")?;
622
Ok(literal_suffix(input))
623
}
624
625
macro_rules! next_ch {
626
($chars:ident @ $pat:pat) => {
627
match $chars.next() {
628
Some((_, ch)) => match ch {
629
$pat => ch,
630
_ => return Err(Reject),
631
},
632
None => return Err(Reject),
633
}
634
};
635
}
636
637
fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
638
where
639
I: Iterator<Item = (usize, char)>,
640
{
641
next_ch!(chars @ '0'..='7');
642
next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
643
Ok(())
644
}
645
646
fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
647
where
648
I: Iterator<Item = (usize, u8)>,
649
{
650
next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
651
next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
652
Ok(())
653
}
654
655
fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
656
where
657
I: Iterator<Item = (usize, char)>,
658
{
659
let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
660
let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
661
if first == '0' && second == '0' {
662
Err(Reject)
663
} else {
664
Ok(())
665
}
666
}
667
668
fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
669
where
670
I: Iterator<Item = (usize, char)>,
671
{
672
next_ch!(chars @ '{');
673
let mut value = 0;
674
let mut len = 0;
675
for (_, ch) in chars {
676
let digit = match ch {
677
'0'..='9' => ch as u8 - b'0',
678
'a'..='f' => 10 + ch as u8 - b'a',
679
'A'..='F' => 10 + ch as u8 - b'A',
680
'_' if len > 0 => continue,
681
'}' if len > 0 => return char::from_u32(value).ok_or(Reject),
682
_ => break,
683
};
684
if len == 6 {
685
break;
686
}
687
value *= 0x10;
688
value += u32::from(digit);
689
len += 1;
690
}
691
Err(Reject)
692
}
693
694
fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
695
let mut whitespace = input.bytes().enumerate();
696
loop {
697
if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
698
return Err(Reject);
699
}
700
match whitespace.next() {
701
Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
702
last = b;
703
}
704
Some((offset, _)) => {
705
*input = input.advance(offset);
706
return Ok(());
707
}
708
None => return Err(Reject),
709
}
710
}
711
}
712
713
fn float(input: Cursor) -> Result<Cursor, Reject> {
714
let mut rest = float_digits(input)?;
715
if let Some(ch) = rest.chars().next() {
716
if is_ident_start(ch) {
717
rest = ident_not_raw(rest)?.0;
718
}
719
}
720
word_break(rest)
721
}
722
723
fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
724
let mut chars = input.chars().peekable();
725
match chars.next() {
726
Some(ch) if '0' <= ch && ch <= '9' => {}
727
_ => return Err(Reject),
728
}
729
730
let mut len = 1;
731
let mut has_dot = false;
732
let mut has_exp = false;
733
while let Some(&ch) = chars.peek() {
734
match ch {
735
'0'..='9' | '_' => {
736
chars.next();
737
len += 1;
738
}
739
'.' => {
740
if has_dot {
741
break;
742
}
743
chars.next();
744
if chars
745
.peek()
746
.map_or(false, |&ch| ch == '.' || is_ident_start(ch))
747
{
748
return Err(Reject);
749
}
750
len += 1;
751
has_dot = true;
752
}
753
'e' | 'E' => {
754
chars.next();
755
len += 1;
756
has_exp = true;
757
break;
758
}
759
_ => break,
760
}
761
}
762
763
if !(has_dot || has_exp) {
764
return Err(Reject);
765
}
766
767
if has_exp {
768
let token_before_exp = if has_dot {
769
Ok(input.advance(len - 1))
770
} else {
771
Err(Reject)
772
};
773
let mut has_sign = false;
774
let mut has_exp_value = false;
775
while let Some(&ch) = chars.peek() {
776
match ch {
777
'+' | '-' => {
778
if has_exp_value {
779
break;
780
}
781
if has_sign {
782
return token_before_exp;
783
}
784
chars.next();
785
len += 1;
786
has_sign = true;
787
}
788
'0'..='9' => {
789
chars.next();
790
len += 1;
791
has_exp_value = true;
792
}
793
'_' => {
794
chars.next();
795
len += 1;
796
}
797
_ => break,
798
}
799
}
800
if !has_exp_value {
801
return token_before_exp;
802
}
803
}
804
805
Ok(input.advance(len))
806
}
807
808
fn int(input: Cursor) -> Result<Cursor, Reject> {
809
let mut rest = digits(input)?;
810
if let Some(ch) = rest.chars().next() {
811
if is_ident_start(ch) {
812
rest = ident_not_raw(rest)?.0;
813
}
814
}
815
word_break(rest)
816
}
817
818
fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
819
let base = if input.starts_with("0x") {
820
input = input.advance(2);
821
16
822
} else if input.starts_with("0o") {
823
input = input.advance(2);
824
8
825
} else if input.starts_with("0b") {
826
input = input.advance(2);
827
2
828
} else {
829
10
830
};
831
832
let mut len = 0;
833
let mut empty = true;
834
for b in input.bytes() {
835
match b {
836
b'0'..=b'9' => {
837
let digit = (b - b'0') as u64;
838
if digit >= base {
839
return Err(Reject);
840
}
841
}
842
b'a'..=b'f' => {
843
let digit = 10 + (b - b'a') as u64;
844
if digit >= base {
845
break;
846
}
847
}
848
b'A'..=b'F' => {
849
let digit = 10 + (b - b'A') as u64;
850
if digit >= base {
851
break;
852
}
853
}
854
b'_' => {
855
if empty && base == 10 {
856
return Err(Reject);
857
}
858
len += 1;
859
continue;
860
}
861
_ => break,
862
}
863
len += 1;
864
empty = false;
865
}
866
if empty {
867
Err(Reject)
868
} else {
869
Ok(input.advance(len))
870
}
871
}
872
873
fn punct(input: Cursor) -> PResult<Punct> {
874
let (rest, ch) = punct_char(input)?;
875
if ch == '\'' {
876
let (after_lifetime, _ident) = ident_any(rest)?;
877
if after_lifetime.starts_with_char('\'')
878
|| (after_lifetime.starts_with_char('#') && !rest.starts_with("r#"))
879
{
880
Err(Reject)
881
} else {
882
Ok((rest, Punct::new('\'', Spacing::Joint)))
883
}
884
} else {
885
let kind = match punct_char(rest) {
886
Ok(_) => Spacing::Joint,
887
Err(Reject) => Spacing::Alone,
888
};
889
Ok((rest, Punct::new(ch, kind)))
890
}
891
}
892
893
fn punct_char(input: Cursor) -> PResult<char> {
894
if input.starts_with("//") || input.starts_with("/*") {
895
// Do not accept `/` of a comment as a punct.
896
return Err(Reject);
897
}
898
899
let mut chars = input.chars();
900
let first = match chars.next() {
901
Some(ch) => ch,
902
None => {
903
return Err(Reject);
904
}
905
};
906
let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
907
if recognized.contains(first) {
908
Ok((input.advance(first.len_utf8()), first))
909
} else {
910
Err(Reject)
911
}
912
}
913
914
fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
915
#[cfg(span_locations)]
916
let lo = input.off;
917
let (rest, (comment, inner)) = doc_comment_contents(input)?;
918
let fallback_span = Span {
919
#[cfg(span_locations)]
920
lo,
921
#[cfg(span_locations)]
922
hi: rest.off,
923
};
924
let span = crate::Span::_new_fallback(fallback_span);
925
926
let mut scan_for_bare_cr = comment;
927
while let Some(cr) = scan_for_bare_cr.find('\r') {
928
let rest = &scan_for_bare_cr[cr + 1..];
929
if !rest.starts_with('\n') {
930
return Err(Reject);
931
}
932
scan_for_bare_cr = rest;
933
}
934
935
let mut pound = Punct::new('#', Spacing::Alone);
936
pound.set_span(span);
937
trees.push_token_from_parser(TokenTree::Punct(pound));
938
939
if inner {
940
let mut bang = Punct::new('!', Spacing::Alone);
941
bang.set_span(span);
942
trees.push_token_from_parser(TokenTree::Punct(bang));
943
}
944
945
let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span));
946
let mut equal = Punct::new('=', Spacing::Alone);
947
equal.set_span(span);
948
let mut literal = crate::Literal::_new_fallback(Literal::string(comment));
949
literal.set_span(span);
950
let mut bracketed = TokenStreamBuilder::with_capacity(3);
951
bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
952
bracketed.push_token_from_parser(TokenTree::Punct(equal));
953
bracketed.push_token_from_parser(TokenTree::Literal(literal));
954
let group = Group::new(Delimiter::Bracket, bracketed.build());
955
let mut group = crate::Group::_new_fallback(group);
956
group.set_span(span);
957
trees.push_token_from_parser(TokenTree::Group(group));
958
959
Ok((rest, ()))
960
}
961
962
fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
963
if input.starts_with("//!") {
964
let input = input.advance(3);
965
let (input, s) = take_until_newline_or_eof(input);
966
Ok((input, (s, true)))
967
} else if input.starts_with("/*!") {
968
let (input, s) = block_comment(input)?;
969
Ok((input, (&s[3..s.len() - 2], true)))
970
} else if input.starts_with("///") {
971
let input = input.advance(3);
972
if input.starts_with_char('/') {
973
return Err(Reject);
974
}
975
let (input, s) = take_until_newline_or_eof(input);
976
Ok((input, (s, false)))
977
} else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
978
let (input, s) = block_comment(input)?;
979
Ok((input, (&s[3..s.len() - 2], false)))
980
} else {
981
Err(Reject)
982
}
983
}
984
985
fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
986
let chars = input.char_indices();
987
988
for (i, ch) in chars {
989
if ch == '\n' {
990
return (input.advance(i), &input.rest[..i]);
991
} else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
992
return (input.advance(i + 1), &input.rest[..i]);
993
}
994
}
995
996
(input.advance(input.len()), input.rest)
997
}
998
999