X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;ds=sidebyside;f=compiler%2Frustc_parse%2Fsrc%2Fparser%2Fmod.rs;h=dfe758d0cdf019406876e2f018699dd736c7a774;hb=81799cd8fd841e23b52876ae5e22faeb3ad04eb5;hp=b6f4cd119e00c84b5cde6abfb8605c3d6b404f39;hpb=b1e6dee59666d2f85a5121730ec128934519260f;p=rust.git diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index b6f4cd119e0..dfe758d0cdf 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -150,6 +150,11 @@ pub struct Parser<'a> { pub current_closure: Option, } +// This type is used a lot, e.g. it's cloned when matching many declarative macro rules. Make sure +// it doesn't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(Parser<'_>, 328); + /// Stores span information about a closure. #[derive(Clone)] pub struct ClosureSpans { @@ -203,7 +208,12 @@ fn drop(&mut self) { #[derive(Clone)] struct TokenCursor { + // The current (innermost) frame. `frame` and `stack` could be combined, + // but it's faster to have them separately to access `frame` directly + // rather than via something like `stack.last().unwrap()` or + // `stack[stack.len() - 1]`. frame: TokenCursorFrame, + // Additional frames that enclose `frame`. stack: Vec, desugar_doc_comments: bool, // Counts the number of calls to `{,inlined_}next`. @@ -234,22 +244,13 @@ struct TokenCursor { #[derive(Clone)] struct TokenCursorFrame { - delim: token::DelimToken, - span: DelimSpan, - open_delim: bool, + delim_sp: Option<(DelimToken, DelimSpan)>, tree_cursor: tokenstream::Cursor, - close_delim: bool, } impl TokenCursorFrame { - fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self { - TokenCursorFrame { - delim, - span, - open_delim: false, - tree_cursor: tts.into_trees(), - close_delim: false, - } + fn new(delim_sp: Option<(DelimToken, DelimSpan)>, tts: TokenStream) -> Self { + TokenCursorFrame { delim_sp, tree_cursor: tts.into_trees() } } } @@ -261,92 +262,88 @@ fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { /// This always-inlined version should only be used on hot code paths. #[inline(always)] fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { - let (token, spacing) = loop { - let (tree, spacing) = if !self.frame.open_delim { - self.frame.open_delim = true; - TokenTree::token(token::OpenDelim(self.frame.delim), self.frame.span.open).into() - } else if let Some(tree) = self.frame.tree_cursor.next_with_spacing() { - tree - } else if !self.frame.close_delim { - self.frame.close_delim = true; - TokenTree::token(token::CloseDelim(self.frame.delim), self.frame.span.close).into() + loop { + // FIXME: we currently don't return `NoDelim` open/close delims. To fix #67062 we will + // need to, whereupon the `delim != DelimToken::NoDelim` conditions below can be + // removed. + if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing_ref() { + match tree { + &TokenTree::Token(ref token) => match (desugar_doc_comments, token) { + (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { + return self.desugar(attr_style, data, span); + } + _ => return (token.clone(), *spacing), + }, + &TokenTree::Delimited(sp, delim, ref tts) => { + // Set `open_delim` to true here because we deal with it immediately. + let frame = TokenCursorFrame::new(Some((delim, sp)), tts.clone()); + self.stack.push(mem::replace(&mut self.frame, frame)); + if delim != DelimToken::NoDelim { + return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone); + } + // No open delimeter to return; continue on to the next iteration. + } + }; } else if let Some(frame) = self.stack.pop() { + if let Some((delim, span)) = self.frame.delim_sp && delim != DelimToken::NoDelim { + self.frame = frame; + return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone); + } self.frame = frame; - continue; + // No close delimiter to return; continue on to the next iteration. } else { - (TokenTree::Token(Token::new(token::Eof, DUMMY_SP)), Spacing::Alone) + return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); + } + } + } + + fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) { + // Searches for the occurrences of `"#*` and returns the minimum number of `#`s + // required to wrap the text. + let mut num_of_hashes = 0; + let mut count = 0; + for ch in data.as_str().chars() { + count = match ch { + '"' => 1, + '#' if count > 0 => count + 1, + _ => 0, }; + num_of_hashes = cmp::max(num_of_hashes, count); + } - match tree { - TokenTree::Token(token) => { - break (token, spacing); - } - TokenTree::Delimited(sp, delim, tts) => { - let frame = TokenCursorFrame::new(sp, delim, tts); - self.stack.push(mem::replace(&mut self.frame, frame)); - } - } - }; + let delim_span = DelimSpan::from_single(span); + let body = TokenTree::Delimited( + delim_span, + token::Bracket, + [ + TokenTree::token(token::Ident(sym::doc, false), span), + TokenTree::token(token::Eq, span), + TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), span), + ] + .iter() + .cloned() + .collect::(), + ); - match (desugar_doc_comments, &token) { - (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { - // Searches for the occurrences of `"#*` and returns the minimum number of `#`s - // required to wrap the text. - let mut num_of_hashes = 0; - let mut count = 0; - for ch in data.as_str().chars() { - count = match ch { - '"' => 1, - '#' if count > 0 => count + 1, - _ => 0, - }; - num_of_hashes = cmp::max(num_of_hashes, count); - } + self.stack.push(mem::replace( + &mut self.frame, + TokenCursorFrame::new( + None, + if attr_style == AttrStyle::Inner { + [TokenTree::token(token::Pound, span), TokenTree::token(token::Not, span), body] + .iter() + .cloned() + .collect::() + } else { + [TokenTree::token(token::Pound, span), body] + .iter() + .cloned() + .collect::() + }, + ), + )); - let delim_span = DelimSpan::from_single(span); - let body = TokenTree::Delimited( - delim_span, - token::Bracket, - [ - TokenTree::token(token::Ident(sym::doc, false), span), - TokenTree::token(token::Eq, span), - TokenTree::token( - TokenKind::lit(token::StrRaw(num_of_hashes), data, None), - span, - ), - ] - .iter() - .cloned() - .collect::(), - ); - - self.stack.push(mem::replace( - &mut self.frame, - TokenCursorFrame::new( - delim_span, - token::NoDelim, - if attr_style == AttrStyle::Inner { - [ - TokenTree::token(token::Pound, span), - TokenTree::token(token::Not, span), - body, - ] - .iter() - .cloned() - .collect::() - } else { - [TokenTree::token(token::Pound, span), body] - .iter() - .cloned() - .collect::() - }, - ), - )); - - self.next(/* desugar_doc_comments */ false) - } - _ => (token, spacing), - } + self.next(/* desugar_doc_comments */ false) } } @@ -431,10 +428,6 @@ pub fn new( desugar_doc_comments: bool, subparser_name: Option<&'static str>, ) -> Self { - let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens); - start_frame.open_delim = true; - start_frame.close_delim = true; - let mut parser = Parser { sess, token: Token::dummy(), @@ -444,7 +437,7 @@ pub fn new( restrictions: Restrictions::empty(), expected_tokens: Vec::new(), token_cursor: TokenCursor { - frame: start_frame, + frame: TokenCursorFrame::new(None, tokens), stack: Vec::new(), num_next_calls: 0, desugar_doc_comments, @@ -987,12 +980,6 @@ fn bump_with(&mut self, next: (Token, Spacing)) { /// This always-inlined version should only be used on hot code paths. #[inline(always)] fn inlined_bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) { - // Bumping after EOF is a bad sign, usually an infinite loop. - if self.prev_token.kind == TokenKind::Eof { - let msg = "attempted to bump the parser past EOF (may be stuck in a loop)"; - self.span_bug(self.token.span, msg); - } - // Update the current and previous tokens. self.prev_token = mem::replace(&mut self.token, next_token); self.token_spacing = next_spacing; @@ -1003,25 +990,24 @@ fn inlined_bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) { /// Advance the parser by one token. pub fn bump(&mut self) { - let fallback_span = self.token.span; - loop { - let (mut next, spacing) = self.token_cursor.inlined_next(self.desugar_doc_comments); - self.token_cursor.num_next_calls += 1; - // We've retrieved an token from the underlying - // cursor, so we no longer need to worry about - // an unglued token. See `break_and_eat` for more details - self.token_cursor.break_last_token = false; - if next.span.is_dummy() { - // Tweak the location for better diagnostics, but keep syntactic context intact. - next.span = fallback_span.with_ctxt(next.span.ctxt()); - } - if !matches!( - next.kind, - token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) - ) { - return self.inlined_bump_with((next, spacing)); - } + // Note: destructuring here would give nicer code, but it was found in #96210 to be slower + // than `.0`/`.1` access. + let mut next = self.token_cursor.inlined_next(self.desugar_doc_comments); + self.token_cursor.num_next_calls += 1; + // We've retrieved an token from the underlying + // cursor, so we no longer need to worry about + // an unglued token. See `break_and_eat` for more details + self.token_cursor.break_last_token = false; + if next.0.span.is_dummy() { + // Tweak the location for better diagnostics, but keep syntactic context intact. + let fallback_span = self.token.span; + next.0.span = fallback_span.with_ctxt(next.0.span.ctxt()); } + debug_assert!(!matches!( + next.0.kind, + token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) + )); + self.inlined_bump_with(next) } /// Look-ahead `dist` tokens of `self.token` and get access to that token there. @@ -1032,7 +1018,7 @@ pub fn look_ahead(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R } let frame = &self.token_cursor.frame; - if frame.delim != DelimToken::NoDelim { + if let Some((delim, span)) = frame.delim_sp && delim != DelimToken::NoDelim { let all_normal = (0..dist).all(|i| { let token = frame.tree_cursor.look_ahead(i); !matches!(token, Some(TokenTree::Delimited(_, DelimToken::NoDelim, _))) @@ -1045,7 +1031,7 @@ pub fn look_ahead(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R looker(&Token::new(token::OpenDelim(*delim), dspan.open)) } }, - None => looker(&Token::new(token::CloseDelim(frame.delim), frame.span.close)), + None => looker(&Token::new(token::CloseDelim(delim), span.close)), }; } } @@ -1202,24 +1188,27 @@ fn parse_or_use_outer_attributes( pub(crate) fn parse_token_tree(&mut self) -> TokenTree { match self.token.kind { token::OpenDelim(..) => { - let depth = self.token_cursor.stack.len(); - - // We keep advancing the token cursor until we hit - // the matching `CloseDelim` token. - while !(depth == self.token_cursor.stack.len() - && matches!(self.token.kind, token::CloseDelim(_))) - { + // Grab the tokens from this frame. + let frame = &self.token_cursor.frame; + let stream = frame.tree_cursor.stream.clone(); + let (delim, span) = frame.delim_sp.unwrap(); + + // Advance the token cursor through the entire delimited + // sequence. After getting the `OpenDelim` we are *within* the + // delimited sequence, i.e. at depth `d`. After getting the + // matching `CloseDelim` we are *after* the delimited sequence, + // i.e. at depth `d - 1`. + let target_depth = self.token_cursor.stack.len() - 1; + loop { // Advance one token at a time, so `TokenCursor::next()` // can capture these tokens if necessary. self.bump(); + if self.token_cursor.stack.len() == target_depth { + debug_assert!(matches!(self.token.kind, token::CloseDelim(_))); + break; + } } - // We are still inside the frame corresponding - // to the delimited stream we captured, so grab - // the tokens from this frame. - let frame = &self.token_cursor.frame; - let stream = frame.tree_cursor.stream.clone(); - let span = frame.span; - let delim = frame.delim; + // Consume close delimiter self.bump(); TokenTree::Delimited(span, delim, stream)