compiler/rustc_parse/src/parser/attr_wrapper.rs

   1 use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCursor, TrailingToken};
   2 use rustc_ast::token::{self, DelimToken, Token, TokenKind};
   3 use rustc_ast::tokenstream::{AttrAnnotatedTokenStream, AttributesData, CreateTokenStream};
   4 use rustc_ast::tokenstream::{AttrAnnotatedTokenTree, DelimSpan, LazyTokenStream, Spacing};
   5 use rustc_ast::{self as ast};
   6 use rustc_ast::{AstLike, AttrVec, Attribute};
   7 use rustc_errors::PResult;
   8 use rustc_span::{sym, Span, DUMMY_SP};
   9
  10 use std::convert::TryInto;
  11 use std::ops::Range;
  12
  13 /// A wrapper type to ensure that the parser handles outer attributes correctly.
  14 /// When we parse outer attributes, we need to ensure that we capture tokens
  15 /// for the attribute target. This allows us to perform cfg-expansion on
  16 /// a token stream before we invoke a derive proc-macro.
  17 ///
  18 /// This wrapper prevents direct access to the underlying `Vec<ast::Attribute>`.
  19 /// Parsing code can only get access to the underlying attributes
  20 /// by passing an `AttrWrapper` to `collect_tokens_trailing_tokens`.
  21 /// This makes it difficult to accidentally construct an AST node
  22 /// (which stores a `Vec<ast::Attribute>`) without first collecting tokens.
  23 ///
  24 /// This struct has its own module, to ensure that the parser code
  25 /// cannot directly access the `attrs` field
  26 #[derive(Debug, Clone)]
  27 pub struct AttrWrapper {
  28     attrs: AttrVec,
  29     // The start of the outer attributes in the token cursor.
  30     // This allows us to create a `ReplaceRange` for the entire attribute
  31     // target, including outer attributes.
  32     start_pos: usize,
  33 }
  34
  35 // This struct is passed around very frequently,
  36 // so make sure it doesn't accidentally get larger
  37 #[cfg(target_arch = "x86_64")]
  38 rustc_data_structures::static_assert_size!(AttrWrapper, 16);
  39
  40 impl AttrWrapper {
  41     pub(super) fn new(attrs: AttrVec, start_pos: usize) -> AttrWrapper {
  42         AttrWrapper { attrs, start_pos }
  43     }
  44     pub fn empty() -> AttrWrapper {
  45         AttrWrapper { attrs: AttrVec::new(), start_pos: usize::MAX }
  46     }
  47     // FIXME: Delay span bug here?
  48     pub(crate) fn take_for_recovery(self) -> AttrVec {
  49         self.attrs
  50     }
  51
  52     // FIXME: require passing an NT to prevent misuse of this method
  53     pub(crate) fn prepend_to_nt_inner(self, attrs: &mut Vec<Attribute>) {
  54         let mut self_attrs: Vec<_> = self.attrs.into();
  55         std::mem::swap(attrs, &mut self_attrs);
  56         attrs.extend(self_attrs);
  57     }
  58
  59     pub fn is_empty(&self) -> bool {
  60         self.attrs.is_empty()
  61     }
  62
  63     pub fn maybe_needs_tokens(&self) -> bool {
  64         crate::parser::attr::maybe_needs_tokens(&self.attrs)
  65     }
  66 }
  67
  68 /// Returns `true` if `attrs` contains a `cfg` or `cfg_attr` attribute
  69 fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
  70     // NOTE: Builtin attributes like `cfg` and `cfg_attr` cannot be renamed via imports.
  71     // Therefore, the absence of a literal `cfg` or `cfg_attr` guarantees that
  72     // we don't need to do any eager expansion.
  73     attrs.iter().any(|attr| {
  74         attr.ident().map_or(false, |ident| ident.name == sym::cfg || ident.name == sym::cfg_attr)
  75     })
  76 }
  77
  78 // Produces a `TokenStream` on-demand. Using `cursor_snapshot`
  79 // and `num_calls`, we can reconstruct the `TokenStream` seen
  80 // by the callback. This allows us to avoid producing a `TokenStream`
  81 // if it is never needed - for example, a captured `macro_rules!`
  82 // argument that is never passed to a proc macro.
  83 // In practice token stream creation happens rarely compared to
  84 // calls to `collect_tokens` (see some statistics in #78736),
  85 // so we are doing as little up-front work as possible.
  86 //
  87 // This also makes `Parser` very cheap to clone, since
  88 // there is no intermediate collection buffer to clone.
  89 #[derive(Clone)]
  90 struct LazyTokenStreamImpl {
  91     start_token: (Token, Spacing),
  92     cursor_snapshot: TokenCursor,
  93     num_calls: usize,
  94     break_last_token: bool,
  95     replace_ranges: Box<[ReplaceRange]>,
  96 }
  97
  98 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
  99 rustc_data_structures::static_assert_size!(LazyTokenStreamImpl, 144);
 100
 101 impl CreateTokenStream for LazyTokenStreamImpl {
 102     fn create_token_stream(&self) -> AttrAnnotatedTokenStream {
 103         // The token produced by the final call to `next` or `next_desugared`
 104         // was not actually consumed by the callback. The combination
 105         // of chaining the initial token and using `take` produces the desired
 106         // result - we produce an empty `TokenStream` if no calls were made,
 107         // and omit the final token otherwise.
 108         let mut cursor_snapshot = self.cursor_snapshot.clone();
 109         let tokens =
 110             std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1))
 111                 .chain((0..self.num_calls).map(|_| {
 112                     let token = if cursor_snapshot.desugar_doc_comments {
 113                         cursor_snapshot.next_desugared()
 114                     } else {
 115                         cursor_snapshot.next()
 116                     };
 117                     (FlatToken::Token(token.0), token.1)
 118                 }))
 119                 .take(self.num_calls);
 120
 121         if !self.replace_ranges.is_empty() {
 122             let mut tokens: Vec<_> = tokens.collect();
 123             let mut replace_ranges = self.replace_ranges.clone();
 124             replace_ranges.sort_by_key(|(range, _)| range.start);
 125
 126             #[cfg(debug_assertions)]
 127             {
 128                 for [(range, tokens), (next_range, next_tokens)] in replace_ranges.array_windows() {
 129                     assert!(
 130                         range.end <= next_range.start || range.end >= next_range.end,
 131                         "Replace ranges should either be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
 132                         range,
 133                         tokens,
 134                         next_range,
 135                         next_tokens,
 136                     );
 137                 }
 138             }
 139
 140             // Process the replace ranges, starting from the highest start
 141             // position and working our way back. If have tokens like:
 142             //
 143             // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }`
 144             //
 145             // Then we will generate replace ranges for both
 146             // the `#[cfg(FALSE)] field: bool` and the entire
 147             // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }`
 148             //
 149             // By starting processing from the replace range with the greatest
 150             // start position, we ensure that any replace range which encloses
 151             // another replace range will capture the *replaced* tokens for the inner
 152             // range, not the original tokens.
 153             for (range, new_tokens) in replace_ranges.iter().rev() {
 154                 assert!(!range.is_empty(), "Cannot replace an empty range: {:?}", range);
 155                 // Replace ranges are only allowed to decrease the number of tokens.
 156                 assert!(
 157                     range.len() >= new_tokens.len(),
 158                     "Range {:?} has greater len than {:?}",
 159                     range,
 160                     new_tokens
 161                 );
 162
 163                 // Replace any removed tokens with `FlatToken::Empty`.
 164                 // This keeps the total length of `tokens` constant throughout the
 165                 // replacement process, allowing us to use all of the `ReplaceRanges` entries
 166                 // without adjusting indices.
 167                 let filler = std::iter::repeat((FlatToken::Empty, Spacing::Alone))
 168                     .take(range.len() - new_tokens.len());
 169
 170                 tokens.splice(
 171                     (range.start as usize)..(range.end as usize),
 172                     new_tokens.clone().into_iter().chain(filler),
 173                 );
 174             }
 175             make_token_stream(tokens.into_iter(), self.break_last_token)
 176         } else {
 177             make_token_stream(tokens, self.break_last_token)
 178         }
 179     }
 180 }
 181
 182 impl<'a> Parser<'a> {
 183     /// Records all tokens consumed by the provided callback,
 184     /// including the current token. These tokens are collected
 185     /// into a `LazyTokenStream`, and returned along with the result
 186     /// of the callback.
 187     ///
 188     /// Note: If your callback consumes an opening delimiter
 189     /// (including the case where you call `collect_tokens`
 190     /// when the current token is an opening delimeter),
 191     /// you must also consume the corresponding closing delimiter.
 192     ///
 193     /// That is, you can consume
 194     /// `something ([{ }])` or `([{}])`, but not `([{}]`
 195     ///
 196     /// This restriction shouldn't be an issue in practice,
 197     /// since this function is used to record the tokens for
 198     /// a parsed AST item, which always has matching delimiters.
 199     pub fn collect_tokens_trailing_token<R: AstLike>(
 200         &mut self,
 201         attrs: AttrWrapper,
 202         force_collect: ForceCollect,
 203         f: impl FnOnce(&mut Self, Vec<ast::Attribute>) -> PResult<'a, (R, TrailingToken)>,
 204     ) -> PResult<'a, R> {
 205         // We only bail out when nothing could possibly observe the collected tokens:
 206         // 1. We cannot be force collecting tokens (since force-collecting requires tokens
 207         //    by definition
 208         if matches!(force_collect, ForceCollect::No)
 209             // None of our outer attributes can require tokens (e.g. a proc-macro)
 210             && !attrs.maybe_needs_tokens()
 211             // If our target supports custom inner attributes, then we cannot bail
 212             // out early, since we may need to capture tokens for a custom inner attribute
 213             // invocation.
 214             && !R::SUPPORTS_CUSTOM_INNER_ATTRS
 215             // Never bail out early in `capture_cfg` mode, since there might be `#[cfg]`
 216             // or `#[cfg_attr]` attributes.
 217             && !self.capture_cfg
 218         {
 219             return Ok(f(self, attrs.attrs.into())?.0);
 220         }
 221
 222         let start_token = (self.token.clone(), self.token_spacing);
 223         let cursor_snapshot = self.token_cursor.clone();
 224
 225         let has_outer_attrs = !attrs.attrs.is_empty();
 226         let prev_capturing = std::mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
 227         let replace_ranges_start = self.capture_state.replace_ranges.len();
 228
 229         let ret = f(self, attrs.attrs.into());
 230
 231         self.capture_state.capturing = prev_capturing;
 232
 233         let (mut ret, trailing) = ret?;
 234
 235         // When we're not in `capture-cfg` mode, then bail out early if:
 236         // 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`)
 237         //    so there's nothing for us to do.
 238         // 2. Our target already has tokens set (e.g. we've parsed something
 239         // like `#[my_attr] $item`. The actual parsing code takes care of prepending
 240         // any attributes to the nonterminal, so we don't need to modify the
 241         // already captured tokens.
 242         // Note that this check is independent of `force_collect`- if we already
 243         // have tokens, or can't even store them, then there's never a need to
 244         // force collection of new tokens.
 245         if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
 246             return Ok(ret);
 247         }
 248
 249         // This is very similar to the bail out check at the start of this function.
 250         // Now that we've parsed an AST node, we have more information available.
 251         if matches!(force_collect, ForceCollect::No)
 252             // We now have inner attributes available, so this check is more precise
 253             // than `attrs.maybe_needs_tokens()` at the start of the function.
 254             // As a result, we don't need to check `R::SUPPORTS_CUSTOM_INNER_ATTRS`
 255             && !crate::parser::attr::maybe_needs_tokens(ret.attrs())
 256             // Subtle: We call `has_cfg_or_cfg_attr` with the attrs from `ret`.
 257             // This ensures that we consider inner attributes (e.g. `#![cfg]`),
 258             // which require us to have tokens available
 259             // We also call `has_cfg_or_cfg_attr` at the beginning of this function,
 260             // but we only bail out if there's no possibility of inner attributes
 261             // (!R::SUPPORTS_CUSTOM_INNER_ATTRS)
 262             // We only catpure about `#[cfg]` or `#[cfg_attr]` in `capture_cfg`
 263             // mode - during normal parsing, we don't need any special capturing
 264             // for those attributes, since they're builtin.
 265             && !(self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()))
 266         {
 267             return Ok(ret);
 268         }
 269
 270         let mut inner_attr_replace_ranges = Vec::new();
 271         // Take the captured ranges for any inner attributes that we parsed.
 272         for inner_attr in ret.attrs().iter().filter(|a| a.style == ast::AttrStyle::Inner) {
 273             if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&inner_attr.id) {
 274                 inner_attr_replace_ranges.push(attr_range);
 275             } else {
 276                 self.sess
 277                     .span_diagnostic
 278                     .delay_span_bug(inner_attr.span, "Missing token range for attribute");
 279             }
 280         }
 281
 282         let replace_ranges_end = self.capture_state.replace_ranges.len();
 283
 284         let cursor_snapshot_next_calls = cursor_snapshot.num_next_calls;
 285         let mut end_pos = self.token_cursor.num_next_calls;
 286
 287         // Capture a trailing token if requested by the callback 'f'
 288         match trailing {
 289             TrailingToken::None => {}
 290             TrailingToken::Semi => {
 291                 assert_eq!(self.token.kind, token::Semi);
 292                 end_pos += 1;
 293             }
 294             TrailingToken::MaybeComma => {
 295                 if self.token.kind == token::Comma {
 296                     end_pos += 1;
 297                 }
 298             }
 299         }
 300
 301         // If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens),
 302         // then extend the range of captured tokens to include it, since the parser
 303         // was not actually bumped past it. When the `LazyTokenStream` gets converted
 304         // into a `AttrAnnotatedTokenStream`, we will create the proper token.
 305         if self.token_cursor.break_last_token {
 306             assert_eq!(
 307                 trailing,
 308                 TrailingToken::None,
 309                 "Cannot set `break_last_token` and have trailing token"
 310             );
 311             end_pos += 1;
 312         }
 313
 314         let num_calls = end_pos - cursor_snapshot_next_calls;
 315
 316         // If we have no attributes, then we will never need to
 317         // use any replace ranges.
 318         let replace_ranges: Box<[ReplaceRange]> = if ret.attrs().is_empty() && !self.capture_cfg {
 319             Box::new([])
 320         } else {
 321             // Grab any replace ranges that occur *inside* the current AST node.
 322             // We will perform the actual replacement when we convert the `LazyTokenStream`
 323             // to a `AttrAnnotatedTokenStream`
 324             let start_calls: u32 = cursor_snapshot_next_calls.try_into().unwrap();
 325             self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end]
 326                 .iter()
 327                 .cloned()
 328                 .chain(inner_attr_replace_ranges.clone().into_iter())
 329                 .map(|(range, tokens)| {
 330                     ((range.start - start_calls)..(range.end - start_calls), tokens)
 331                 })
 332                 .collect()
 333         };
 334
 335         let tokens = LazyTokenStream::new(LazyTokenStreamImpl {
 336             start_token,
 337             num_calls,
 338             cursor_snapshot,
 339             break_last_token: self.token_cursor.break_last_token,
 340             replace_ranges,
 341         });
 342
 343         // If we support tokens at all
 344         if let Some(target_tokens) = ret.tokens_mut() {
 345             if target_tokens.is_none() {
 346                 // Store se our newly captured tokens into the AST node
 347                 *target_tokens = Some(tokens.clone());
 348             }
 349         }
 350
 351         let final_attrs = ret.attrs();
 352
 353         // If `capture_cfg` is set and we're inside a recursive call to
 354         // `collect_tokens_trailing_token`, then we need to register a replace range
 355         // if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion
 356         // on the captured token stream.
 357         if self.capture_cfg
 358             && matches!(self.capture_state.capturing, Capturing::Yes)
 359             && has_cfg_or_cfg_attr(&final_attrs)
 360         {
 361             let attr_data = AttributesData { attrs: final_attrs.to_vec().into(), tokens };
 362
 363             // Replace the entire AST node that we just parsed, including attributes,
 364             // with a `FlatToken::AttrTarget`. If this AST node is inside an item
 365             // that has `#[derive]`, then this will allow us to cfg-expand this
 366             // AST node.
 367             let start_pos =
 368                 if has_outer_attrs { attrs.start_pos } else { cursor_snapshot_next_calls };
 369             let new_tokens = vec![(FlatToken::AttrTarget(attr_data), Spacing::Alone)];
 370
 371             assert!(
 372                 !self.token_cursor.break_last_token,
 373                 "Should not have unglued last token with cfg attr"
 374             );
 375             let range: Range<u32> = (start_pos.try_into().unwrap())..(end_pos.try_into().unwrap());
 376             self.capture_state.replace_ranges.push((range, new_tokens));
 377             self.capture_state.replace_ranges.extend(inner_attr_replace_ranges);
 378         }
 379
 380         // Only clear our `replace_ranges` when we're finished capturing entirely.
 381         if matches!(self.capture_state.capturing, Capturing::No) {
 382             self.capture_state.replace_ranges.clear();
 383             // We don't clear `inner_attr_ranges`, as doing so repeatedly
 384             // had a measureable performance impact. Most inner attributes that
 385             // we insert will get removed - when we drop the parser, we'll free
 386             // up the memory used by any attributes that we didn't remove from the map.
 387         }
 388         Ok(ret)
 389     }
 390 }
 391
 392 /// Converts a flattened iterator of tokens (including open and close delimiter tokens)
 393 /// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
 394 /// of open and close delims.
 395 // FIXME(#67062): Currently, we don't parse `None`-delimited groups correctly,
 396 // which can cause us to end up with mismatched `None` delimiters in our
 397 // captured tokens. This function contains several hacks to work around this -
 398 // essentially, we throw away mismatched `None` delimiters when we encounter them.
 399 // Once we properly parse `None` delimiters, they can be captured just like any
 400 // other tokens, and these hacks can be removed.
 401 fn make_token_stream(
 402     mut iter: impl Iterator<Item = (FlatToken, Spacing)>,
 403     break_last_token: bool,
 404 ) -> AttrAnnotatedTokenStream {
 405     #[derive(Debug)]
 406     struct FrameData {
 407         open: Span,
 408         open_delim: DelimToken,
 409         inner: Vec<(AttrAnnotatedTokenTree, Spacing)>,
 410     }
 411     let mut stack =
 412         vec![FrameData { open: DUMMY_SP, open_delim: DelimToken::NoDelim, inner: vec![] }];
 413     let mut token_and_spacing = iter.next();
 414     while let Some((token, spacing)) = token_and_spacing {
 415         match token {
 416             FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => {
 417                 stack.push(FrameData { open: span, open_delim: delim, inner: vec![] });
 418             }
 419             FlatToken::Token(Token { kind: TokenKind::CloseDelim(delim), span }) => {
 420                 // HACK: If we enconter a mismatched `None` delimiter at the top
 421                 // level, just ignore it.
 422                 if matches!(delim, DelimToken::NoDelim)
 423                     && (stack.len() == 1
 424                         || !matches!(stack.last_mut().unwrap().open_delim, DelimToken::NoDelim))
 425                 {
 426                     token_and_spacing = iter.next();
 427                     continue;
 428                 }
 429                 let frame_data = stack
 430                     .pop()
 431                     .unwrap_or_else(|| panic!("Token stack was empty for token: {:?}", token));
 432
 433                 // HACK: If our current frame has a mismatched opening `None` delimiter,
 434                 // merge our current frame with the one above it. That is, transform
 435                 // `[ { < first second } third ]` into `[ { first second } third ]`
 436                 if !matches!(delim, DelimToken::NoDelim)
 437                     && matches!(frame_data.open_delim, DelimToken::NoDelim)
 438                 {
 439                     stack.last_mut().unwrap().inner.extend(frame_data.inner);
 440                     // Process our closing delimiter again, this time at the previous
 441                     // frame in the stack
 442                     token_and_spacing = Some((token, spacing));
 443                     continue;
 444                 }
 445
 446                 assert_eq!(
 447                     frame_data.open_delim, delim,
 448                     "Mismatched open/close delims: open={:?} close={:?}",
 449                     frame_data.open, span
 450                 );
 451                 let dspan = DelimSpan::from_pair(frame_data.open, span);
 452                 let stream = AttrAnnotatedTokenStream::new(frame_data.inner);
 453                 let delimited = AttrAnnotatedTokenTree::Delimited(dspan, delim, stream);
 454                 stack
 455                     .last_mut()
 456                     .unwrap_or_else(|| {
 457                         panic!("Bottom token frame is missing for token: {:?}", token)
 458                     })
 459                     .inner
 460                     .push((delimited, Spacing::Alone));
 461             }
 462             FlatToken::Token(token) => stack
 463                 .last_mut()
 464                 .expect("Bottom token frame is missing!")
 465                 .inner
 466                 .push((AttrAnnotatedTokenTree::Token(token), spacing)),
 467             FlatToken::AttrTarget(data) => stack
 468                 .last_mut()
 469                 .expect("Bottom token frame is missing!")
 470                 .inner
 471                 .push((AttrAnnotatedTokenTree::Attributes(data), spacing)),
 472             FlatToken::Empty => {}
 473         }
 474         token_and_spacing = iter.next();
 475     }
 476     // HACK: If we don't have a closing `None` delimiter for our last
 477     // frame, merge the frame with the top-level frame. That is,
 478     // turn `< first second` into `first second`
 479     if stack.len() == 2 && stack[1].open_delim == DelimToken::NoDelim {
 480         let temp_buf = stack.pop().unwrap();
 481         stack.last_mut().unwrap().inner.extend(temp_buf.inner);
 482     }
 483     let mut final_buf = stack.pop().expect("Missing final buf!");
 484     if break_last_token {
 485         let (last_token, spacing) = final_buf.inner.pop().unwrap();
 486         if let AttrAnnotatedTokenTree::Token(last_token) = last_token {
 487             let unglued_first = last_token.kind.break_two_token_op().unwrap().0;
 488
 489             // A 'unglued' token is always two ASCII characters
 490             let mut first_span = last_token.span.shrink_to_lo();
 491             first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1));
 492
 493             final_buf.inner.push((
 494                 AttrAnnotatedTokenTree::Token(Token::new(unglued_first, first_span)),
 495                 spacing,
 496             ));
 497         } else {
 498             panic!("Unexpected last token {:?}", last_token)
 499         }
 500     }
 501     assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
 502     AttrAnnotatedTokenStream::new(final_buf.inner)
 503 }