X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=compiler%2Frustc_parse%2Fsrc%2Fparser%2Fmod.rs;h=f1956fb695bf7ef4c46b738fff61ddd5dd107541;hb=81799cd8fd841e23b52876ae5e22faeb3ad04eb5;hp=792f9d9ccce56476cf602177f83a45d9f46f61f4;hpb=306ba8357fb36212b7d30efb9eb9e41659ac1445;p=rust.git diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 792f9d9ccce..dfe758d0cdf 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -123,8 +123,8 @@ pub struct Parser<'a> { pub capture_cfg: bool, restrictions: Restrictions, expected_tokens: Vec, - // Important: This must only be advanced from `next_tok` - // to ensure that `token_cursor.num_next_calls` is updated properly + // Important: This must only be advanced from `bump` to ensure that + // `token_cursor.num_next_calls` is updated properly. token_cursor: TokenCursor, desugar_doc_comments: bool, /// This field is used to keep track of how many left angle brackets we have seen. This is @@ -150,6 +150,11 @@ pub struct Parser<'a> { pub current_closure: Option, } +// This type is used a lot, e.g. it's cloned when matching many declarative macro rules. Make sure +// it doesn't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(Parser<'_>, 328); + /// Stores span information about a closure. #[derive(Clone)] pub struct ClosureSpans { @@ -203,12 +208,15 @@ fn drop(&mut self) { #[derive(Clone)] struct TokenCursor { + // The current (innermost) frame. `frame` and `stack` could be combined, + // but it's faster to have them separately to access `frame` directly + // rather than via something like `stack.last().unwrap()` or + // `stack[stack.len() - 1]`. frame: TokenCursorFrame, + // Additional frames that enclose `frame`. stack: Vec, desugar_doc_comments: bool, - // Counts the number of calls to `{,inlined_}next` or - // `{,inlined_}next_desugared`, depending on whether - // `desugar_doc_comments` is set. + // Counts the number of calls to `{,inlined_}next`. num_next_calls: usize, // During parsing, we may sometimes need to 'unglue' a // glued token into two component tokens @@ -236,75 +244,60 @@ struct TokenCursor { #[derive(Clone)] struct TokenCursorFrame { - delim: token::DelimToken, - span: DelimSpan, - open_delim: bool, + delim_sp: Option<(DelimToken, DelimSpan)>, tree_cursor: tokenstream::Cursor, - close_delim: bool, } impl TokenCursorFrame { - fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self { - TokenCursorFrame { - delim, - span, - open_delim: false, - tree_cursor: tts.into_trees(), - close_delim: false, - } + fn new(delim_sp: Option<(DelimToken, DelimSpan)>, tts: TokenStream) -> Self { + TokenCursorFrame { delim_sp, tree_cursor: tts.into_trees() } } } impl TokenCursor { - fn next(&mut self) -> (Token, Spacing) { - self.inlined_next() + fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { + self.inlined_next(desugar_doc_comments) } /// This always-inlined version should only be used on hot code paths. #[inline(always)] - fn inlined_next(&mut self) -> (Token, Spacing) { + fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { loop { - let (tree, spacing) = if !self.frame.open_delim { - self.frame.open_delim = true; - TokenTree::open_tt(self.frame.span, self.frame.delim).into() - } else if let Some(tree) = self.frame.tree_cursor.next_with_spacing() { - tree - } else if !self.frame.close_delim { - self.frame.close_delim = true; - TokenTree::close_tt(self.frame.span, self.frame.delim).into() + // FIXME: we currently don't return `NoDelim` open/close delims. To fix #67062 we will + // need to, whereupon the `delim != DelimToken::NoDelim` conditions below can be + // removed. + if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing_ref() { + match tree { + &TokenTree::Token(ref token) => match (desugar_doc_comments, token) { + (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { + return self.desugar(attr_style, data, span); + } + _ => return (token.clone(), *spacing), + }, + &TokenTree::Delimited(sp, delim, ref tts) => { + // Set `open_delim` to true here because we deal with it immediately. + let frame = TokenCursorFrame::new(Some((delim, sp)), tts.clone()); + self.stack.push(mem::replace(&mut self.frame, frame)); + if delim != DelimToken::NoDelim { + return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone); + } + // No open delimeter to return; continue on to the next iteration. + } + }; } else if let Some(frame) = self.stack.pop() { + if let Some((delim, span)) = self.frame.delim_sp && delim != DelimToken::NoDelim { + self.frame = frame; + return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone); + } self.frame = frame; - continue; + // No close delimiter to return; continue on to the next iteration. } else { - (TokenTree::Token(Token::new(token::Eof, DUMMY_SP)), Spacing::Alone) - }; - - match tree { - TokenTree::Token(token) => { - return (token, spacing); - } - TokenTree::Delimited(sp, delim, tts) => { - let frame = TokenCursorFrame::new(sp, delim, tts); - self.stack.push(mem::replace(&mut self.frame, frame)); - } + return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); } } } - fn next_desugared(&mut self) -> (Token, Spacing) { - self.inlined_next_desugared() - } - - /// This always-inlined version should only be used on hot code paths. - #[inline(always)] - fn inlined_next_desugared(&mut self) -> (Token, Spacing) { - let (data, attr_style, sp) = match self.inlined_next() { - (Token { kind: token::DocComment(_, attr_style, data), span }, _) => { - (data, attr_style, span) - } - tok => return tok, - }; - + fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) { // Searches for the occurrences of `"#*` and returns the minimum number of `#`s // required to wrap the text. let mut num_of_hashes = 0; @@ -318,14 +311,14 @@ fn inlined_next_desugared(&mut self) -> (Token, Spacing) { num_of_hashes = cmp::max(num_of_hashes, count); } - let delim_span = DelimSpan::from_single(sp); + let delim_span = DelimSpan::from_single(span); let body = TokenTree::Delimited( delim_span, token::Bracket, [ - TokenTree::token(token::Ident(sym::doc, false), sp), - TokenTree::token(token::Eq, sp), - TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), sp), + TokenTree::token(token::Ident(sym::doc, false), span), + TokenTree::token(token::Eq, span), + TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), span), ] .iter() .cloned() @@ -335,15 +328,14 @@ fn inlined_next_desugared(&mut self) -> (Token, Spacing) { self.stack.push(mem::replace( &mut self.frame, TokenCursorFrame::new( - delim_span, - token::NoDelim, + None, if attr_style == AttrStyle::Inner { - [TokenTree::token(token::Pound, sp), TokenTree::token(token::Not, sp), body] + [TokenTree::token(token::Pound, span), TokenTree::token(token::Not, span), body] .iter() .cloned() .collect::() } else { - [TokenTree::token(token::Pound, sp), body] + [TokenTree::token(token::Pound, span), body] .iter() .cloned() .collect::() @@ -351,7 +343,7 @@ fn inlined_next_desugared(&mut self) -> (Token, Spacing) { ), )); - self.next() + self.next(/* desugar_doc_comments */ false) } } @@ -436,10 +428,6 @@ pub fn new( desugar_doc_comments: bool, subparser_name: Option<&'static str>, ) -> Self { - let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens); - start_frame.open_delim = true; - start_frame.close_delim = true; - let mut parser = Parser { sess, token: Token::dummy(), @@ -449,7 +437,7 @@ pub fn new( restrictions: Restrictions::empty(), expected_tokens: Vec::new(), token_cursor: TokenCursor { - frame: start_frame, + frame: TokenCursorFrame::new(None, tokens), stack: Vec::new(), num_next_calls: 0, desugar_doc_comments, @@ -476,33 +464,6 @@ pub fn new( parser } - #[inline] - fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) { - loop { - let (mut next, spacing) = if self.desugar_doc_comments { - self.token_cursor.inlined_next_desugared() - } else { - self.token_cursor.inlined_next() - }; - self.token_cursor.num_next_calls += 1; - // We've retrieved an token from the underlying - // cursor, so we no longer need to worry about - // an unglued token. See `break_and_eat` for more details - self.token_cursor.break_last_token = false; - if next.span.is_dummy() { - // Tweak the location for better diagnostics, but keep syntactic context intact. - next.span = fallback_span.with_ctxt(next.span.ctxt()); - } - if matches!( - next.kind, - token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) - ) { - continue; - } - return (next, spacing); - } - } - pub fn unexpected(&mut self) -> PResult<'a, T> { match self.expect_one_of(&[], &[]) { Err(e) => Err(e), @@ -697,7 +658,7 @@ fn break_and_eat(&mut self, expected: TokenKind) -> bool { // // If we consume any additional tokens, then this token // is not needed (we'll capture the entire 'glued' token), - // and `next_tok` will set this field to `None` + // and `bump` will set this field to `None` self.token_cursor.break_last_token = true; // Use the spacing of the glued token as the spacing // of the unglued second token. @@ -1019,12 +980,6 @@ fn bump_with(&mut self, next: (Token, Spacing)) { /// This always-inlined version should only be used on hot code paths. #[inline(always)] fn inlined_bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) { - // Bumping after EOF is a bad sign, usually an infinite loop. - if self.prev_token.kind == TokenKind::Eof { - let msg = "attempted to bump the parser past EOF (may be stuck in a loop)"; - self.span_bug(self.token.span, msg); - } - // Update the current and previous tokens. self.prev_token = mem::replace(&mut self.token, next_token); self.token_spacing = next_spacing; @@ -1035,8 +990,24 @@ fn inlined_bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) { /// Advance the parser by one token. pub fn bump(&mut self) { - let next_token = self.next_tok(self.token.span); - self.inlined_bump_with(next_token); + // Note: destructuring here would give nicer code, but it was found in #96210 to be slower + // than `.0`/`.1` access. + let mut next = self.token_cursor.inlined_next(self.desugar_doc_comments); + self.token_cursor.num_next_calls += 1; + // We've retrieved an token from the underlying + // cursor, so we no longer need to worry about + // an unglued token. See `break_and_eat` for more details + self.token_cursor.break_last_token = false; + if next.0.span.is_dummy() { + // Tweak the location for better diagnostics, but keep syntactic context intact. + let fallback_span = self.token.span; + next.0.span = fallback_span.with_ctxt(next.0.span.ctxt()); + } + debug_assert!(!matches!( + next.0.kind, + token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) + )); + self.inlined_bump_with(next) } /// Look-ahead `dist` tokens of `self.token` and get access to that token there. @@ -1047,7 +1018,7 @@ pub fn look_ahead(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R } let frame = &self.token_cursor.frame; - if frame.delim != DelimToken::NoDelim { + if let Some((delim, span)) = frame.delim_sp && delim != DelimToken::NoDelim { let all_normal = (0..dist).all(|i| { let token = frame.tree_cursor.look_ahead(i); !matches!(token, Some(TokenTree::Delimited(_, DelimToken::NoDelim, _))) @@ -1060,7 +1031,7 @@ pub fn look_ahead(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R looker(&Token::new(token::OpenDelim(*delim), dspan.open)) } }, - None => looker(&Token::new(token::CloseDelim(frame.delim), frame.span.close)), + None => looker(&Token::new(token::CloseDelim(delim), span.close)), }; } } @@ -1069,7 +1040,7 @@ pub fn look_ahead(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R let mut i = 0; let mut token = Token::dummy(); while i < dist { - token = cursor.next().0; + token = cursor.next(/* desugar_doc_comments */ false).0; if matches!( token.kind, token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) @@ -1125,13 +1096,13 @@ fn parse_const_block(&mut self, span: Span, pat: bool) -> PResult<'a, P> { self.sess.gated_spans.gate(sym::inline_const, span); } self.eat_keyword(kw::Const); - let blk = self.parse_block()?; + let (attrs, blk) = self.parse_inner_attrs_and_block()?; let anon_const = AnonConst { id: DUMMY_NODE_ID, value: self.mk_expr(blk.span, ExprKind::Block(blk, None), AttrVec::new()), }; let blk_span = anon_const.value.span; - Ok(self.mk_expr(span.to(blk_span), ExprKind::ConstBlock(anon_const), AttrVec::new())) + Ok(self.mk_expr(span.to(blk_span), ExprKind::ConstBlock(anon_const), AttrVec::from(attrs))) } /// Parses mutability (`mut` or nothing). @@ -1217,24 +1188,27 @@ fn parse_or_use_outer_attributes( pub(crate) fn parse_token_tree(&mut self) -> TokenTree { match self.token.kind { token::OpenDelim(..) => { - let depth = self.token_cursor.stack.len(); - - // We keep advancing the token cursor until we hit - // the matching `CloseDelim` token. - while !(depth == self.token_cursor.stack.len() - && matches!(self.token.kind, token::CloseDelim(_))) - { + // Grab the tokens from this frame. + let frame = &self.token_cursor.frame; + let stream = frame.tree_cursor.stream.clone(); + let (delim, span) = frame.delim_sp.unwrap(); + + // Advance the token cursor through the entire delimited + // sequence. After getting the `OpenDelim` we are *within* the + // delimited sequence, i.e. at depth `d`. After getting the + // matching `CloseDelim` we are *after* the delimited sequence, + // i.e. at depth `d - 1`. + let target_depth = self.token_cursor.stack.len() - 1; + loop { // Advance one token at a time, so `TokenCursor::next()` // can capture these tokens if necessary. self.bump(); + if self.token_cursor.stack.len() == target_depth { + debug_assert!(matches!(self.token.kind, token::CloseDelim(_))); + break; + } } - // We are still inside the frame corresponding - // to the delimited stream we captured, so grab - // the tokens from this frame. - let frame = &self.token_cursor.frame; - let stream = frame.tree_cursor.stream.clone(); - let span = frame.span; - let delim = frame.delim; + // Consume close delimiter self.bump(); TokenTree::Delimited(span, delim, stream) @@ -1289,7 +1263,7 @@ fn is_crate_vis(&self) -> bool { /// so emit a proper diagnostic. // Public for rustfmt usage. pub fn parse_visibility(&mut self, fbt: FollowedByType) -> PResult<'a, Visibility> { - maybe_whole!(self, NtVis, |x| x); + maybe_whole!(self, NtVis, |x| x.into_inner()); self.expected_tokens.push(TokenType::Keyword(kw::Crate)); if self.is_crate_vis() {