Auto merge of #96495 - Dylan-DPC:rollup-9lm4tpp, r=Dylan-DPC

[rust.git] / compiler / rustc_parse / src / parser / mod.rs
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs

index b6f4cd119e00c84b5cde6abfb8605c3d6b404f39..dfe758d0cdf019406876e2f018699dd736c7a774 100644 (file)
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -150,6 +150,11 @@ pub struct Parser<'a> {
      pub current_closure: Option<ClosureSpans>,
  }
  
+// This type is used a lot, e.g. it's cloned when matching many declarative macro rules. Make sure
+// it doesn't unintentionally get bigger.
+#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
+rustc_data_structures::static_assert_size!(Parser<'_>, 328);
+
  /// Stores span information about a closure.
  #[derive(Clone)]
  pub struct ClosureSpans {
@@ -203,7 +208,12 @@ fn drop(&mut self) {
  
  #[derive(Clone)]
  struct TokenCursor {
+    // The current (innermost) frame. `frame` and `stack` could be combined,
+    // but it's faster to have them separately to access `frame` directly
+    // rather than via something like `stack.last().unwrap()` or
+    // `stack[stack.len() - 1]`.
      frame: TokenCursorFrame,
+    // Additional frames that enclose `frame`.
      stack: Vec<TokenCursorFrame>,
      desugar_doc_comments: bool,
      // Counts the number of calls to `{,inlined_}next`.
@@ -234,22 +244,13 @@ struct TokenCursor {
  
  #[derive(Clone)]
  struct TokenCursorFrame {
-    delim: token::DelimToken,
-    span: DelimSpan,
-    open_delim: bool,
+    delim_sp: Option<(DelimToken, DelimSpan)>,
      tree_cursor: tokenstream::Cursor,
-    close_delim: bool,
  }
  
  impl TokenCursorFrame {
-    fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self {
-        TokenCursorFrame {
-            delim,
-            span,
-            open_delim: false,
-            tree_cursor: tts.into_trees(),
-            close_delim: false,
-        }
+    fn new(delim_sp: Option<(DelimToken, DelimSpan)>, tts: TokenStream) -> Self {
+        TokenCursorFrame { delim_sp, tree_cursor: tts.into_trees() }
      }
  }
  
@@ -261,92 +262,88 @@ fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
      /// This always-inlined version should only be used on hot code paths.
      #[inline(always)]
      fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
-        let (token, spacing) = loop {
-            let (tree, spacing) = if !self.frame.open_delim {
-                self.frame.open_delim = true;
-                TokenTree::token(token::OpenDelim(self.frame.delim), self.frame.span.open).into()
-            } else if let Some(tree) = self.frame.tree_cursor.next_with_spacing() {
-                tree
-            } else if !self.frame.close_delim {
-                self.frame.close_delim = true;
-                TokenTree::token(token::CloseDelim(self.frame.delim), self.frame.span.close).into()
+        loop {
+            // FIXME: we currently don't return `NoDelim` open/close delims. To fix #67062 we will
+            // need to, whereupon the `delim != DelimToken::NoDelim` conditions below can be
+            // removed.
+            if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing_ref() {
+                match tree {
+                    &TokenTree::Token(ref token) => match (desugar_doc_comments, token) {
+                        (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => {
+                            return self.desugar(attr_style, data, span);
+                        }
+                        _ => return (token.clone(), *spacing),
+                    },
+                    &TokenTree::Delimited(sp, delim, ref tts) => {
+                        // Set `open_delim` to true here because we deal with it immediately.
+                        let frame = TokenCursorFrame::new(Some((delim, sp)), tts.clone());
+                        self.stack.push(mem::replace(&mut self.frame, frame));
+                        if delim != DelimToken::NoDelim {
+                            return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone);
+                        }
+                        // No open delimeter to return; continue on to the next iteration.
+                    }
+                };
              } else if let Some(frame) = self.stack.pop() {
+                if let Some((delim, span)) = self.frame.delim_sp && delim != DelimToken::NoDelim {
+                    self.frame = frame;
+                    return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone);
+                }
                  self.frame = frame;
-                continue;
+                // No close delimiter to return; continue on to the next iteration.
              } else {
-                (TokenTree::Token(Token::new(token::Eof, DUMMY_SP)), Spacing::Alone)
+                return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
+            }
+        }
+    }
+
+    fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) {
+        // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
+        // required to wrap the text.
+        let mut num_of_hashes = 0;
+        let mut count = 0;
+        for ch in data.as_str().chars() {
+            count = match ch {
+                '"' => 1,
+                '#' if count > 0 => count + 1,
+                _ => 0,
              };
+            num_of_hashes = cmp::max(num_of_hashes, count);
+        }
  
-            match tree {
-                TokenTree::Token(token) => {
-                    break (token, spacing);
-                }
-                TokenTree::Delimited(sp, delim, tts) => {
-                    let frame = TokenCursorFrame::new(sp, delim, tts);
-                    self.stack.push(mem::replace(&mut self.frame, frame));
-                }
-            }
-        };
+        let delim_span = DelimSpan::from_single(span);
+        let body = TokenTree::Delimited(
+            delim_span,
+            token::Bracket,
+            [
+                TokenTree::token(token::Ident(sym::doc, false), span),
+                TokenTree::token(token::Eq, span),
+                TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), span),
+            ]
+            .iter()
+            .cloned()
+            .collect::<TokenStream>(),
+        );
  
-        match (desugar_doc_comments, &token) {
-            (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => {
-                // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
-                // required to wrap the text.
-                let mut num_of_hashes = 0;
-                let mut count = 0;
-                for ch in data.as_str().chars() {
-                    count = match ch {
-                        '"' => 1,
-                        '#' if count > 0 => count + 1,
-                        _ => 0,
-                    };
-                    num_of_hashes = cmp::max(num_of_hashes, count);
-                }
+        self.stack.push(mem::replace(
+            &mut self.frame,
+            TokenCursorFrame::new(
+                None,
+                if attr_style == AttrStyle::Inner {
+                    [TokenTree::token(token::Pound, span), TokenTree::token(token::Not, span), body]
+                        .iter()
+                        .cloned()
+                        .collect::<TokenStream>()
+                } else {
+                    [TokenTree::token(token::Pound, span), body]
+                        .iter()
+                        .cloned()
+                        .collect::<TokenStream>()
+                },
+            ),
+        ));
  
-                let delim_span = DelimSpan::from_single(span);
-                let body = TokenTree::Delimited(
-                    delim_span,
-                    token::Bracket,
-                    [
-                        TokenTree::token(token::Ident(sym::doc, false), span),
-                        TokenTree::token(token::Eq, span),
-                        TokenTree::token(
-                            TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
-                            span,
-                        ),
-                    ]
-                    .iter()
-                    .cloned()
-                    .collect::<TokenStream>(),
-                );
-
-                self.stack.push(mem::replace(
-                    &mut self.frame,
-                    TokenCursorFrame::new(
-                        delim_span,
-                        token::NoDelim,
-                        if attr_style == AttrStyle::Inner {
-                            [
-                                TokenTree::token(token::Pound, span),
-                                TokenTree::token(token::Not, span),
-                                body,
-                            ]
-                            .iter()
-                            .cloned()
-                            .collect::<TokenStream>()
-                        } else {
-                            [TokenTree::token(token::Pound, span), body]
-                                .iter()
-                                .cloned()
-                                .collect::<TokenStream>()
-                        },
-                    ),
-                ));
-
-                self.next(/* desugar_doc_comments */ false)
-            }
-            _ => (token, spacing),
-        }
+        self.next(/* desugar_doc_comments */ false)
      }
  }
  
@@ -431,10 +428,6 @@ pub fn new(
          desugar_doc_comments: bool,
          subparser_name: Option<&'static str>,
      ) -> Self {
-        let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens);
-        start_frame.open_delim = true;
-        start_frame.close_delim = true;
-
          let mut parser = Parser {
              sess,
              token: Token::dummy(),
@@ -444,7 +437,7 @@ pub fn new(
              restrictions: Restrictions::empty(),
              expected_tokens: Vec::new(),
              token_cursor: TokenCursor {
-                frame: start_frame,
+                frame: TokenCursorFrame::new(None, tokens),
                  stack: Vec::new(),
                  num_next_calls: 0,
                  desugar_doc_comments,
@@ -987,12 +980,6 @@ fn bump_with(&mut self, next: (Token, Spacing)) {
      /// This always-inlined version should only be used on hot code paths.
      #[inline(always)]
      fn inlined_bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) {
-        // Bumping after EOF is a bad sign, usually an infinite loop.
-        if self.prev_token.kind == TokenKind::Eof {
-            let msg = "attempted to bump the parser past EOF (may be stuck in a loop)";
-            self.span_bug(self.token.span, msg);
-        }
-
          // Update the current and previous tokens.
          self.prev_token = mem::replace(&mut self.token, next_token);
          self.token_spacing = next_spacing;
@@ -1003,25 +990,24 @@ fn inlined_bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) {
  
      /// Advance the parser by one token.
      pub fn bump(&mut self) {
-        let fallback_span = self.token.span;
-        loop {
-            let (mut next, spacing) = self.token_cursor.inlined_next(self.desugar_doc_comments);
-            self.token_cursor.num_next_calls += 1;
-            // We've retrieved an token from the underlying
-            // cursor, so we no longer need to worry about
-            // an unglued token. See `break_and_eat` for more details
-            self.token_cursor.break_last_token = false;
-            if next.span.is_dummy() {
-                // Tweak the location for better diagnostics, but keep syntactic context intact.
-                next.span = fallback_span.with_ctxt(next.span.ctxt());
-            }
-            if !matches!(
-                next.kind,
-                token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim)
-            ) {
-                return self.inlined_bump_with((next, spacing));
-            }
+        // Note: destructuring here would give nicer code, but it was found in #96210 to be slower
+        // than `.0`/`.1` access.
+        let mut next = self.token_cursor.inlined_next(self.desugar_doc_comments);
+        self.token_cursor.num_next_calls += 1;
+        // We've retrieved an token from the underlying
+        // cursor, so we no longer need to worry about
+        // an unglued token. See `break_and_eat` for more details
+        self.token_cursor.break_last_token = false;
+        if next.0.span.is_dummy() {
+            // Tweak the location for better diagnostics, but keep syntactic context intact.
+            let fallback_span = self.token.span;
+            next.0.span = fallback_span.with_ctxt(next.0.span.ctxt());
          }
+        debug_assert!(!matches!(
+            next.0.kind,
+            token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim)
+        ));
+        self.inlined_bump_with(next)
      }
  
      /// Look-ahead `dist` tokens of `self.token` and get access to that token there.
@@ -1032,7 +1018,7 @@ pub fn look_ahead<R>(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R
          }
  
          let frame = &self.token_cursor.frame;
-        if frame.delim != DelimToken::NoDelim {
+        if let Some((delim, span)) = frame.delim_sp && delim != DelimToken::NoDelim {
              let all_normal = (0..dist).all(|i| {
                  let token = frame.tree_cursor.look_ahead(i);
                  !matches!(token, Some(TokenTree::Delimited(_, DelimToken::NoDelim, _)))
@@ -1045,7 +1031,7 @@ pub fn look_ahead<R>(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R
                              looker(&Token::new(token::OpenDelim(*delim), dspan.open))
                          }
                      },
-                    None => looker(&Token::new(token::CloseDelim(frame.delim), frame.span.close)),
+                    None => looker(&Token::new(token::CloseDelim(delim), span.close)),
                  };
              }
          }
@@ -1202,24 +1188,27 @@ fn parse_or_use_outer_attributes(
      pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
          match self.token.kind {
              token::OpenDelim(..) => {
-                let depth = self.token_cursor.stack.len();
-
-                // We keep advancing the token cursor until we hit
-                // the matching `CloseDelim` token.
-                while !(depth == self.token_cursor.stack.len()
-                    && matches!(self.token.kind, token::CloseDelim(_)))
-                {
+                // Grab the tokens from this frame.
+                let frame = &self.token_cursor.frame;
+                let stream = frame.tree_cursor.stream.clone();
+                let (delim, span) = frame.delim_sp.unwrap();
+
+                // Advance the token cursor through the entire delimited
+                // sequence. After getting the `OpenDelim` we are *within* the
+                // delimited sequence, i.e. at depth `d`. After getting the
+                // matching `CloseDelim` we are *after* the delimited sequence,
+                // i.e. at depth `d - 1`.
+                let target_depth = self.token_cursor.stack.len() - 1;
+                loop {
                      // Advance one token at a time, so `TokenCursor::next()`
                      // can capture these tokens if necessary.
                      self.bump();
+                    if self.token_cursor.stack.len() == target_depth {
+                        debug_assert!(matches!(self.token.kind, token::CloseDelim(_)));
+                        break;
+                    }
                  }
-                // We are still inside the frame corresponding
-                // to the delimited stream we captured, so grab
-                // the tokens from this frame.
-                let frame = &self.token_cursor.frame;
-                let stream = frame.tree_cursor.stream.clone();
-                let span = frame.span;
-                let delim = frame.delim;
+
                  // Consume close delimiter
                  self.bump();
                  TokenTree::Delimited(span, delim, stream)