compiler/rustc_parse/src/parser/attr_wrapper.rs

   1 use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCursor, TrailingToken};
   2 use rustc_ast::token::{self, Delimiter, Token, TokenKind};
   3 use rustc_ast::tokenstream::{AttrTokenStream, AttributesData, ToAttrTokenStream};
   4 use rustc_ast::tokenstream::{AttrTokenTree, DelimSpan, LazyAttrTokenStream, Spacing};
   5 use rustc_ast::{self as ast};
   6 use rustc_ast::{AttrVec, Attribute, HasAttrs, HasTokens};
   7 use rustc_errors::PResult;
   8 use rustc_session::parse::ParseSess;
   9 use rustc_span::{sym, Span, DUMMY_SP};
  10
  11 use std::convert::TryInto;
  12 use std::ops::Range;
  13
  14 /// A wrapper type to ensure that the parser handles outer attributes correctly.
  15 /// When we parse outer attributes, we need to ensure that we capture tokens
  16 /// for the attribute target. This allows us to perform cfg-expansion on
  17 /// a token stream before we invoke a derive proc-macro.
  18 ///
  19 /// This wrapper prevents direct access to the underlying `ast::AttrVec>`.
  20 /// Parsing code can only get access to the underlying attributes
  21 /// by passing an `AttrWrapper` to `collect_tokens_trailing_tokens`.
  22 /// This makes it difficult to accidentally construct an AST node
  23 /// (which stores an `ast::AttrVec`) without first collecting tokens.
  24 ///
  25 /// This struct has its own module, to ensure that the parser code
  26 /// cannot directly access the `attrs` field
  27 #[derive(Debug, Clone)]
  28 pub struct AttrWrapper {
  29     attrs: AttrVec,
  30     // The start of the outer attributes in the token cursor.
  31     // This allows us to create a `ReplaceRange` for the entire attribute
  32     // target, including outer attributes.
  33     start_pos: usize,
  34 }
  35
  36 impl AttrWrapper {
  37     pub(super) fn new(attrs: AttrVec, start_pos: usize) -> AttrWrapper {
  38         AttrWrapper { attrs, start_pos }
  39     }
  40     pub fn empty() -> AttrWrapper {
  41         AttrWrapper { attrs: AttrVec::new(), start_pos: usize::MAX }
  42     }
  43
  44     pub(crate) fn take_for_recovery(self, sess: &ParseSess) -> AttrVec {
  45         sess.span_diagnostic.delay_span_bug(
  46             self.attrs.get(0).map(|attr| attr.span).unwrap_or(DUMMY_SP),
  47             "AttrVec is taken for recovery but no error is produced",
  48         );
  49
  50         self.attrs
  51     }
  52
  53     /// Prepend `self.attrs` to `attrs`.
  54     // FIXME: require passing an NT to prevent misuse of this method
  55     pub(crate) fn prepend_to_nt_inner(self, attrs: &mut AttrVec) {
  56         let mut self_attrs = self.attrs;
  57         std::mem::swap(attrs, &mut self_attrs);
  58         attrs.extend(self_attrs);
  59     }
  60
  61     pub fn is_empty(&self) -> bool {
  62         self.attrs.is_empty()
  63     }
  64
  65     pub fn maybe_needs_tokens(&self) -> bool {
  66         crate::parser::attr::maybe_needs_tokens(&self.attrs)
  67     }
  68 }
  69
  70 /// Returns `true` if `attrs` contains a `cfg` or `cfg_attr` attribute
  71 fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
  72     // NOTE: Builtin attributes like `cfg` and `cfg_attr` cannot be renamed via imports.
  73     // Therefore, the absence of a literal `cfg` or `cfg_attr` guarantees that
  74     // we don't need to do any eager expansion.
  75     attrs.iter().any(|attr| {
  76         attr.ident().map_or(false, |ident| ident.name == sym::cfg || ident.name == sym::cfg_attr)
  77     })
  78 }
  79
  80 // Produces a `TokenStream` on-demand. Using `cursor_snapshot`
  81 // and `num_calls`, we can reconstruct the `TokenStream` seen
  82 // by the callback. This allows us to avoid producing a `TokenStream`
  83 // if it is never needed - for example, a captured `macro_rules!`
  84 // argument that is never passed to a proc macro.
  85 // In practice token stream creation happens rarely compared to
  86 // calls to `collect_tokens` (see some statistics in #78736),
  87 // so we are doing as little up-front work as possible.
  88 //
  89 // This also makes `Parser` very cheap to clone, since
  90 // there is no intermediate collection buffer to clone.
  91 #[derive(Clone)]
  92 struct LazyAttrTokenStreamImpl {
  93     start_token: (Token, Spacing),
  94     cursor_snapshot: TokenCursor,
  95     num_calls: usize,
  96     break_last_token: bool,
  97     replace_ranges: Box<[ReplaceRange]>,
  98 }
  99
 100 impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
 101     fn to_attr_token_stream(&self) -> AttrTokenStream {
 102         // The token produced by the final call to `{,inlined_}next` was not
 103         // actually consumed by the callback. The combination of chaining the
 104         // initial token and using `take` produces the desired result - we
 105         // produce an empty `TokenStream` if no calls were made, and omit the
 106         // final token otherwise.
 107         let mut cursor_snapshot = self.cursor_snapshot.clone();
 108         let tokens =
 109             std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1))
 110                 .chain((0..self.num_calls).map(|_| {
 111                     let token = cursor_snapshot.next(cursor_snapshot.desugar_doc_comments);
 112                     (FlatToken::Token(token.0), token.1)
 113                 }))
 114                 .take(self.num_calls);
 115
 116         if !self.replace_ranges.is_empty() {
 117             let mut tokens: Vec<_> = tokens.collect();
 118             let mut replace_ranges = self.replace_ranges.to_vec();
 119             replace_ranges.sort_by_key(|(range, _)| range.start);
 120
 121             #[cfg(debug_assertions)]
 122             {
 123                 for [(range, tokens), (next_range, next_tokens)] in replace_ranges.array_windows() {
 124                     assert!(
 125                         range.end <= next_range.start || range.end >= next_range.end,
 126                         "Replace ranges should either be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
 127                         range,
 128                         tokens,
 129                         next_range,
 130                         next_tokens,
 131                     );
 132                 }
 133             }
 134
 135             // Process the replace ranges, starting from the highest start
 136             // position and working our way back. If have tokens like:
 137             //
 138             // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }`
 139             //
 140             // Then we will generate replace ranges for both
 141             // the `#[cfg(FALSE)] field: bool` and the entire
 142             // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }`
 143             //
 144             // By starting processing from the replace range with the greatest
 145             // start position, we ensure that any replace range which encloses
 146             // another replace range will capture the *replaced* tokens for the inner
 147             // range, not the original tokens.
 148             for (range, new_tokens) in replace_ranges.into_iter().rev() {
 149                 assert!(!range.is_empty(), "Cannot replace an empty range: {:?}", range);
 150                 // Replace ranges are only allowed to decrease the number of tokens.
 151                 assert!(
 152                     range.len() >= new_tokens.len(),
 153                     "Range {:?} has greater len than {:?}",
 154                     range,
 155                     new_tokens
 156                 );
 157
 158                 // Replace any removed tokens with `FlatToken::Empty`.
 159                 // This keeps the total length of `tokens` constant throughout the
 160                 // replacement process, allowing us to use all of the `ReplaceRanges` entries
 161                 // without adjusting indices.
 162                 let filler = std::iter::repeat((FlatToken::Empty, Spacing::Alone))
 163                     .take(range.len() - new_tokens.len());
 164
 165                 tokens.splice(
 166                     (range.start as usize)..(range.end as usize),
 167                     new_tokens.into_iter().chain(filler),
 168                 );
 169             }
 170             make_token_stream(tokens.into_iter(), self.break_last_token)
 171         } else {
 172             make_token_stream(tokens, self.break_last_token)
 173         }
 174     }
 175 }
 176
 177 impl<'a> Parser<'a> {
 178     /// Records all tokens consumed by the provided callback,
 179     /// including the current token. These tokens are collected
 180     /// into a `LazyAttrTokenStream`, and returned along with the result
 181     /// of the callback.
 182     ///
 183     /// Note: If your callback consumes an opening delimiter
 184     /// (including the case where you call `collect_tokens`
 185     /// when the current token is an opening delimiter),
 186     /// you must also consume the corresponding closing delimiter.
 187     ///
 188     /// That is, you can consume
 189     /// `something ([{ }])` or `([{}])`, but not `([{}]`
 190     ///
 191     /// This restriction shouldn't be an issue in practice,
 192     /// since this function is used to record the tokens for
 193     /// a parsed AST item, which always has matching delimiters.
 194     pub fn collect_tokens_trailing_token<R: HasAttrs + HasTokens>(
 195         &mut self,
 196         attrs: AttrWrapper,
 197         force_collect: ForceCollect,
 198         f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, (R, TrailingToken)>,
 199     ) -> PResult<'a, R> {
 200         // We only bail out when nothing could possibly observe the collected tokens:
 201         // 1. We cannot be force collecting tokens (since force-collecting requires tokens
 202         //    by definition
 203         if matches!(force_collect, ForceCollect::No)
 204             // None of our outer attributes can require tokens (e.g. a proc-macro)
 205             && !attrs.maybe_needs_tokens()
 206             // If our target supports custom inner attributes, then we cannot bail
 207             // out early, since we may need to capture tokens for a custom inner attribute
 208             // invocation.
 209             && !R::SUPPORTS_CUSTOM_INNER_ATTRS
 210             // Never bail out early in `capture_cfg` mode, since there might be `#[cfg]`
 211             // or `#[cfg_attr]` attributes.
 212             && !self.capture_cfg
 213         {
 214             return Ok(f(self, attrs.attrs)?.0);
 215         }
 216
 217         let start_token = (self.token.clone(), self.token_spacing);
 218         let cursor_snapshot = self.token_cursor.clone();
 219
 220         let has_outer_attrs = !attrs.attrs.is_empty();
 221         let prev_capturing = std::mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
 222         let replace_ranges_start = self.capture_state.replace_ranges.len();
 223
 224         let ret = f(self, attrs.attrs);
 225
 226         self.capture_state.capturing = prev_capturing;
 227
 228         let (mut ret, trailing) = ret?;
 229
 230         // When we're not in `capture-cfg` mode, then bail out early if:
 231         // 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`)
 232         //    so there's nothing for us to do.
 233         // 2. Our target already has tokens set (e.g. we've parsed something
 234         // like `#[my_attr] $item`. The actual parsing code takes care of prepending
 235         // any attributes to the nonterminal, so we don't need to modify the
 236         // already captured tokens.
 237         // Note that this check is independent of `force_collect`- if we already
 238         // have tokens, or can't even store them, then there's never a need to
 239         // force collection of new tokens.
 240         if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
 241             return Ok(ret);
 242         }
 243
 244         // This is very similar to the bail out check at the start of this function.
 245         // Now that we've parsed an AST node, we have more information available.
 246         if matches!(force_collect, ForceCollect::No)
 247             // We now have inner attributes available, so this check is more precise
 248             // than `attrs.maybe_needs_tokens()` at the start of the function.
 249             // As a result, we don't need to check `R::SUPPORTS_CUSTOM_INNER_ATTRS`
 250             && !crate::parser::attr::maybe_needs_tokens(ret.attrs())
 251             // Subtle: We call `has_cfg_or_cfg_attr` with the attrs from `ret`.
 252             // This ensures that we consider inner attributes (e.g. `#![cfg]`),
 253             // which require us to have tokens available
 254             // We also call `has_cfg_or_cfg_attr` at the beginning of this function,
 255             // but we only bail out if there's no possibility of inner attributes
 256             // (!R::SUPPORTS_CUSTOM_INNER_ATTRS)
 257             // We only capture about `#[cfg]` or `#[cfg_attr]` in `capture_cfg`
 258             // mode - during normal parsing, we don't need any special capturing
 259             // for those attributes, since they're builtin.
 260             && !(self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()))
 261         {
 262             return Ok(ret);
 263         }
 264
 265         let mut inner_attr_replace_ranges = Vec::new();
 266         // Take the captured ranges for any inner attributes that we parsed.
 267         for inner_attr in ret.attrs().iter().filter(|a| a.style == ast::AttrStyle::Inner) {
 268             if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&inner_attr.id) {
 269                 inner_attr_replace_ranges.push(attr_range);
 270             } else {
 271                 self.sess
 272                     .span_diagnostic
 273                     .delay_span_bug(inner_attr.span, "Missing token range for attribute");
 274             }
 275         }
 276
 277         let replace_ranges_end = self.capture_state.replace_ranges.len();
 278
 279         let cursor_snapshot_next_calls = cursor_snapshot.num_next_calls;
 280         let mut end_pos = self.token_cursor.num_next_calls;
 281
 282         let mut captured_trailing = false;
 283
 284         // Capture a trailing token if requested by the callback 'f'
 285         match trailing {
 286             TrailingToken::None => {}
 287             TrailingToken::Gt => {
 288                 assert_eq!(self.token.kind, token::Gt);
 289             }
 290             TrailingToken::Semi => {
 291                 assert_eq!(self.token.kind, token::Semi);
 292                 end_pos += 1;
 293                 captured_trailing = true;
 294             }
 295             TrailingToken::MaybeComma => {
 296                 if self.token.kind == token::Comma {
 297                     end_pos += 1;
 298                     captured_trailing = true;
 299                 }
 300             }
 301         }
 302
 303         // If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens),
 304         // then extend the range of captured tokens to include it, since the parser
 305         // was not actually bumped past it. When the `LazyAttrTokenStream` gets converted
 306         // into an `AttrTokenStream`, we will create the proper token.
 307         if self.token_cursor.break_last_token {
 308             assert!(!captured_trailing, "Cannot set break_last_token and have trailing token");
 309             end_pos += 1;
 310         }
 311
 312         let num_calls = end_pos - cursor_snapshot_next_calls;
 313
 314         // If we have no attributes, then we will never need to
 315         // use any replace ranges.
 316         let replace_ranges: Box<[ReplaceRange]> = if ret.attrs().is_empty() && !self.capture_cfg {
 317             Box::new([])
 318         } else {
 319             // Grab any replace ranges that occur *inside* the current AST node.
 320             // We will perform the actual replacement when we convert the `LazyAttrTokenStream`
 321             // to an `AttrTokenStream`.
 322             let start_calls: u32 = cursor_snapshot_next_calls.try_into().unwrap();
 323             self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end]
 324                 .iter()
 325                 .cloned()
 326                 .chain(inner_attr_replace_ranges.iter().cloned())
 327                 .map(|(range, tokens)| {
 328                     ((range.start - start_calls)..(range.end - start_calls), tokens)
 329                 })
 330                 .collect()
 331         };
 332
 333         let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
 334             start_token,
 335             num_calls,
 336             cursor_snapshot,
 337             break_last_token: self.token_cursor.break_last_token,
 338             replace_ranges,
 339         });
 340
 341         // If we support tokens at all
 342         if let Some(target_tokens) = ret.tokens_mut() {
 343             if target_tokens.is_none() {
 344                 // Store se our newly captured tokens into the AST node
 345                 *target_tokens = Some(tokens.clone());
 346             }
 347         }
 348
 349         let final_attrs = ret.attrs();
 350
 351         // If `capture_cfg` is set and we're inside a recursive call to
 352         // `collect_tokens_trailing_token`, then we need to register a replace range
 353         // if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion
 354         // on the captured token stream.
 355         if self.capture_cfg
 356             && matches!(self.capture_state.capturing, Capturing::Yes)
 357             && has_cfg_or_cfg_attr(final_attrs)
 358         {
 359             let attr_data = AttributesData { attrs: final_attrs.iter().cloned().collect(), tokens };
 360
 361             // Replace the entire AST node that we just parsed, including attributes,
 362             // with a `FlatToken::AttrTarget`. If this AST node is inside an item
 363             // that has `#[derive]`, then this will allow us to cfg-expand this
 364             // AST node.
 365             let start_pos =
 366                 if has_outer_attrs { attrs.start_pos } else { cursor_snapshot_next_calls };
 367             let new_tokens = vec![(FlatToken::AttrTarget(attr_data), Spacing::Alone)];
 368
 369             assert!(
 370                 !self.token_cursor.break_last_token,
 371                 "Should not have unglued last token with cfg attr"
 372             );
 373             let range: Range<u32> = (start_pos.try_into().unwrap())..(end_pos.try_into().unwrap());
 374             self.capture_state.replace_ranges.push((range, new_tokens));
 375             self.capture_state.replace_ranges.extend(inner_attr_replace_ranges);
 376         }
 377
 378         // Only clear our `replace_ranges` when we're finished capturing entirely.
 379         if matches!(self.capture_state.capturing, Capturing::No) {
 380             self.capture_state.replace_ranges.clear();
 381             // We don't clear `inner_attr_ranges`, as doing so repeatedly
 382             // had a measurable performance impact. Most inner attributes that
 383             // we insert will get removed - when we drop the parser, we'll free
 384             // up the memory used by any attributes that we didn't remove from the map.
 385         }
 386         Ok(ret)
 387     }
 388 }
 389
 390 /// Converts a flattened iterator of tokens (including open and close delimiter tokens)
 391 /// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
 392 /// of open and close delims.
 393 fn make_token_stream(
 394     mut iter: impl Iterator<Item = (FlatToken, Spacing)>,
 395     break_last_token: bool,
 396 ) -> AttrTokenStream {
 397     #[derive(Debug)]
 398     struct FrameData {
 399         // This is `None` for the first frame, `Some` for all others.
 400         open_delim_sp: Option<(Delimiter, Span)>,
 401         inner: Vec<AttrTokenTree>,
 402     }
 403     let mut stack = vec![FrameData { open_delim_sp: None, inner: vec![] }];
 404     let mut token_and_spacing = iter.next();
 405     while let Some((token, spacing)) = token_and_spacing {
 406         match token {
 407             FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => {
 408                 stack.push(FrameData { open_delim_sp: Some((delim, span)), inner: vec![] });
 409             }
 410             FlatToken::Token(Token { kind: TokenKind::CloseDelim(delim), span }) => {
 411                 let frame_data = stack
 412                     .pop()
 413                     .unwrap_or_else(|| panic!("Token stack was empty for token: {:?}", token));
 414
 415                 let (open_delim, open_sp) = frame_data.open_delim_sp.unwrap();
 416                 assert_eq!(
 417                     open_delim, delim,
 418                     "Mismatched open/close delims: open={:?} close={:?}",
 419                     open_delim, span
 420                 );
 421                 let dspan = DelimSpan::from_pair(open_sp, span);
 422                 let stream = AttrTokenStream::new(frame_data.inner);
 423                 let delimited = AttrTokenTree::Delimited(dspan, delim, stream);
 424                 stack
 425                     .last_mut()
 426                     .unwrap_or_else(|| {
 427                         panic!("Bottom token frame is missing for token: {:?}", token)
 428                     })
 429                     .inner
 430                     .push(delimited);
 431             }
 432             FlatToken::Token(token) => stack
 433                 .last_mut()
 434                 .expect("Bottom token frame is missing!")
 435                 .inner
 436                 .push(AttrTokenTree::Token(token, spacing)),
 437             FlatToken::AttrTarget(data) => stack
 438                 .last_mut()
 439                 .expect("Bottom token frame is missing!")
 440                 .inner
 441                 .push(AttrTokenTree::Attributes(data)),
 442             FlatToken::Empty => {}
 443         }
 444         token_and_spacing = iter.next();
 445     }
 446     let mut final_buf = stack.pop().expect("Missing final buf!");
 447     if break_last_token {
 448         let last_token = final_buf.inner.pop().unwrap();
 449         if let AttrTokenTree::Token(last_token, spacing) = last_token {
 450             let unglued_first = last_token.kind.break_two_token_op().unwrap().0;
 451
 452             // An 'unglued' token is always two ASCII characters
 453             let mut first_span = last_token.span.shrink_to_lo();
 454             first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1));
 455
 456             final_buf
 457                 .inner
 458                 .push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing));
 459         } else {
 460             panic!("Unexpected last token {:?}", last_token)
 461         }
 462     }
 463     AttrTokenStream::new(final_buf.inner)
 464 }
 465
 466 // Some types are used a lot. Make sure they don't unintentionally get bigger.
 467 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
 468 mod size_asserts {
 469     use super::*;
 470     use rustc_data_structures::static_assert_size;
 471     // tidy-alphabetical-start
 472     static_assert_size!(AttrWrapper, 16);
 473     static_assert_size!(LazyAttrTokenStreamImpl, 144);
 474     // tidy-alphabetical-end
 475 }