// SPDX-License-Identifier: Apache-2.0 OR MIT12//! Extensions to the parsing API with niche applicability.34use crate::buffer::Cursor;5use crate::error::Result;6use crate::parse::{inner_unexpected, ParseBuffer, Unexpected};7use proc_macro2::extra::DelimSpan;8use proc_macro2::Delimiter;9use std::cell::Cell;10use std::mem;11use std::rc::Rc;1213/// Extensions to the `ParseStream` API to support speculative parsing.14pub trait Speculative {15/// Advance this parse stream to the position of a forked parse stream.16///17/// This is the opposite operation to [`ParseStream::fork`]. You can fork a18/// parse stream, perform some speculative parsing, then join the original19/// stream to the fork to "commit" the parsing from the fork to the main20/// stream.21///22/// If you can avoid doing this, you should, as it limits the ability to23/// generate useful errors. That said, it is often the only way to parse24/// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem25/// is that when the fork fails to parse an `A`, it's impossible to tell26/// whether that was because of a syntax error and the user meant to provide27/// an `A`, or that the `A`s are finished and it's time to start parsing28/// `B`s. Use with care.29///30/// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by31/// parsing `B*` and removing the leading members of `A` from the32/// repetition, bypassing the need to involve the downsides associated with33/// speculative parsing.34///35/// [`ParseStream::fork`]: ParseBuffer::fork36///37/// # Example38///39/// There has been chatter about the possibility of making the colons in the40/// turbofish syntax like `path::to::<T>` no longer required by accepting41/// `path::to<T>` in expression position. Specifically, according to [RFC42/// 2544], [`PathSegment`] parsing should always try to consume a following43/// `<` token as the start of generic arguments, and reset to the `<` if44/// that fails (e.g. the token is acting as a less-than operator).45///46/// This is the exact kind of parsing behavior which requires the "fork,47/// try, commit" behavior that [`ParseStream::fork`] discourages. With48/// `advance_to`, we can avoid having to parse the speculatively parsed49/// content a second time.50///51/// This change in behavior can be implemented in syn by replacing just the52/// `Parse` implementation for `PathSegment`:53///54/// ```55/// # use syn::ext::IdentExt;56/// use syn::parse::discouraged::Speculative;57/// # use syn::parse::{Parse, ParseStream};58/// # use syn::{Ident, PathArguments, Result, Token};59///60/// pub struct PathSegment {61/// pub ident: Ident,62/// pub arguments: PathArguments,63/// }64/// #65/// # impl<T> From<T> for PathSegment66/// # where67/// # T: Into<Ident>,68/// # {69/// # fn from(ident: T) -> Self {70/// # PathSegment {71/// # ident: ident.into(),72/// # arguments: PathArguments::None,73/// # }74/// # }75/// # }76///77/// impl Parse for PathSegment {78/// fn parse(input: ParseStream) -> Result<Self> {79/// if input.peek(Token![super])80/// || input.peek(Token![self])81/// || input.peek(Token![Self])82/// || input.peek(Token![crate])83/// {84/// let ident = input.call(Ident::parse_any)?;85/// return Ok(PathSegment::from(ident));86/// }87///88/// let ident = input.parse()?;89/// if input.peek(Token![::]) && input.peek3(Token![<]) {90/// return Ok(PathSegment {91/// ident,92/// arguments: PathArguments::AngleBracketed(input.parse()?),93/// });94/// }95/// if input.peek(Token![<]) && !input.peek(Token![<=]) {96/// let fork = input.fork();97/// if let Ok(arguments) = fork.parse() {98/// input.advance_to(&fork);99/// return Ok(PathSegment {100/// ident,101/// arguments: PathArguments::AngleBracketed(arguments),102/// });103/// }104/// }105/// Ok(PathSegment::from(ident))106/// }107/// }108///109/// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();110/// ```111///112/// # Drawbacks113///114/// The main drawback of this style of speculative parsing is in error115/// presentation. Even if the lookahead is the "correct" parse, the error116/// that is shown is that of the "fallback" parse. To use the same example117/// as the turbofish above, take the following unfinished "turbofish":118///119/// ```text120/// let _ = f<&'a fn(), for<'a> serde::>();121/// ```122///123/// If this is parsed as generic arguments, we can provide the error message124///125/// ```text126/// error: expected identifier127/// --> src.rs:L:C128/// |129/// L | let _ = f<&'a fn(), for<'a> serde::>();130/// | ^131/// ```132///133/// but if parsed using the above speculative parsing, it falls back to134/// assuming that the `<` is a less-than when it fails to parse the generic135/// arguments, and tries to interpret the `&'a` as the start of a labelled136/// loop, resulting in the much less helpful error137///138/// ```text139/// error: expected `:`140/// --> src.rs:L:C141/// |142/// L | let _ = f<&'a fn(), for<'a> serde::>();143/// | ^^144/// ```145///146/// This can be mitigated with various heuristics (two examples: show both147/// forks' parse errors, or show the one that consumed more tokens), but148/// when you can control the grammar, sticking to something that can be149/// parsed LL(3) and without the LL(*) speculative parsing this makes150/// possible, displaying reasonable errors becomes much more simple.151///152/// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544153/// [`PathSegment`]: crate::PathSegment154///155/// # Performance156///157/// This method performs a cheap fixed amount of work that does not depend158/// on how far apart the two streams are positioned.159///160/// # Panics161///162/// The forked stream in the argument of `advance_to` must have been163/// obtained by forking `self`. Attempting to advance to any other stream164/// will cause a panic.165fn advance_to(&self, fork: &Self);166}167168impl<'a> Speculative for ParseBuffer<'a> {169fn advance_to(&self, fork: &Self) {170if !crate::buffer::same_scope(self.cursor(), fork.cursor()) {171panic!("fork was not derived from the advancing parse stream");172}173174let (self_unexp, self_sp) = inner_unexpected(self);175let (fork_unexp, fork_sp) = inner_unexpected(fork);176if !Rc::ptr_eq(&self_unexp, &fork_unexp) {177match (fork_sp, self_sp) {178// Unexpected set on the fork, but not on `self`, copy it over.179(Some((span, delimiter)), None) => {180self_unexp.set(Unexpected::Some(span, delimiter));181}182// Unexpected unset. Use chain to propagate errors from fork.183(None, None) => {184fork_unexp.set(Unexpected::Chain(self_unexp));185186// Ensure toplevel 'unexpected' tokens from the fork don't187// propagate up the chain by replacing the root `unexpected`188// pointer, only 'unexpected' tokens from existing group189// parsers should propagate.190fork.unexpected191.set(Some(Rc::new(Cell::new(Unexpected::None))));192}193// Unexpected has been set on `self`. No changes needed.194(_, Some(_)) => {}195}196}197198// See comment on `cell` in the struct definition.199self.cell200.set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) });201}202}203204/// Extensions to the `ParseStream` API to support manipulating invisible205/// delimiters the same as if they were visible.206pub trait AnyDelimiter {207/// Returns the delimiter, the span of the delimiter token, and the nested208/// contents for further parsing.209fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>;210}211212impl<'a> AnyDelimiter for ParseBuffer<'a> {213fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> {214self.step(|cursor| {215if let Some((content, delimiter, span, rest)) = cursor.any_group() {216let scope = span.close();217let nested = crate::parse::advance_step_cursor(cursor, content);218let unexpected = crate::parse::get_unexpected(self);219let content = crate::parse::new_parse_buffer(scope, nested, unexpected);220Ok(((delimiter, span, content), rest))221} else {222Err(cursor.error("expected any delimiter"))223}224})225}226}227228229