src/librustc_expand/mbe/macro_rules.rs

   1 use crate::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander};
   2 use crate::base::{SyntaxExtension, SyntaxExtensionKind};
   3 use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind};
   4 use crate::mbe;
   5 use crate::mbe::macro_check;
   6 use crate::mbe::macro_parser::parse_tt;
   7 use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success};
   8 use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq};
   9 use crate::mbe::transcribe::transcribe;
  10
  11 use rustc_ast::ast;
  12 use rustc_ast::token::{self, NonterminalKind, NtTT, Token, TokenKind::*};
  13 use rustc_ast::tokenstream::{DelimSpan, TokenStream};
  14 use rustc_ast_pretty::pprust;
  15 use rustc_attr::{self as attr, TransparencyError};
  16 use rustc_data_structures::fx::FxHashMap;
  17 use rustc_data_structures::sync::Lrc;
  18 use rustc_errors::{Applicability, DiagnosticBuilder};
  19 use rustc_feature::Features;
  20 use rustc_parse::parser::Parser;
  21 use rustc_session::parse::ParseSess;
  22 use rustc_session::Session;
  23 use rustc_span::edition::Edition;
  24 use rustc_span::hygiene::Transparency;
  25 use rustc_span::symbol::{kw, sym, Ident, MacroRulesNormalizedIdent};
  26 use rustc_span::Span;
  27
  28 use std::borrow::Cow;
  29 use std::collections::hash_map::Entry;
  30 use std::{mem, slice};
  31 use tracing::debug;
  32
  33 crate struct ParserAnyMacro<'a> {
  34     parser: Parser<'a>,
  35
  36     /// Span of the expansion site of the macro this parser is for
  37     site_span: Span,
  38     /// The ident of the macro we're parsing
  39     macro_ident: Ident,
  40     arm_span: Span,
  41 }
  42
  43 crate fn annotate_err_with_kind(
  44     err: &mut DiagnosticBuilder<'_>,
  45     kind: AstFragmentKind,
  46     span: Span,
  47 ) {
  48     match kind {
  49         AstFragmentKind::Ty => {
  50             err.span_label(span, "this macro call doesn't expand to a type");
  51         }
  52         AstFragmentKind::Pat => {
  53             err.span_label(span, "this macro call doesn't expand to a pattern");
  54         }
  55         _ => {}
  56     };
  57 }
  58
  59 /// Instead of e.g. `vec![a, b, c]` in a pattern context, suggest `[a, b, c]`.
  60 fn suggest_slice_pat(e: &mut DiagnosticBuilder<'_>, site_span: Span, parser: &Parser<'_>) {
  61     let mut suggestion = None;
  62     if let Ok(code) = parser.sess.source_map().span_to_snippet(site_span) {
  63         if let Some(bang) = code.find('!') {
  64             suggestion = Some(code[bang + 1..].to_string());
  65         }
  66     }
  67     if let Some(suggestion) = suggestion {
  68         e.span_suggestion(
  69             site_span,
  70             "use a slice pattern here instead",
  71             suggestion,
  72             Applicability::MachineApplicable,
  73         );
  74     } else {
  75         e.span_label(site_span, "use a slice pattern here instead");
  76     }
  77     e.help(
  78         "for more information, see https://doc.rust-lang.org/edition-guide/\
  79         rust-2018/slice-patterns.html",
  80     );
  81 }
  82
  83 fn emit_frag_parse_err(
  84     mut e: DiagnosticBuilder<'_>,
  85     parser: &Parser<'_>,
  86     orig_parser: &mut Parser<'_>,
  87     site_span: Span,
  88     macro_ident: Ident,
  89     arm_span: Span,
  90     kind: AstFragmentKind,
  91 ) {
  92     if parser.token == token::Eof && e.message().ends_with(", found `<eof>`") {
  93         if !e.span.is_dummy() {
  94             // early end of macro arm (#52866)
  95             e.replace_span_with(parser.sess.source_map().next_point(parser.token.span));
  96         }
  97         let msg = &e.message[0];
  98         e.message[0] = (
  99             format!(
 100                 "macro expansion ends with an incomplete expression: {}",
 101                 msg.0.replace(", found `<eof>`", ""),
 102             ),
 103             msg.1,
 104         );
 105     }
 106     if e.span.is_dummy() {
 107         // Get around lack of span in error (#30128)
 108         e.replace_span_with(site_span);
 109         if !parser.sess.source_map().is_imported(arm_span) {
 110             e.span_label(arm_span, "in this macro arm");
 111         }
 112     } else if parser.sess.source_map().is_imported(parser.token.span) {
 113         e.span_label(site_span, "in this macro invocation");
 114     }
 115     match kind {
 116         AstFragmentKind::Pat if macro_ident.name == sym::vec => {
 117             suggest_slice_pat(&mut e, site_span, parser);
 118         }
 119         // Try a statement if an expression is wanted but failed and suggest adding `;` to call.
 120         AstFragmentKind::Expr => match parse_ast_fragment(orig_parser, AstFragmentKind::Stmts) {
 121             Err(mut err) => err.cancel(),
 122             Ok(_) => {
 123                 e.note(
 124                     "the macro call doesn't expand to an expression, but it can expand to a statement",
 125                 );
 126                 e.span_suggestion_verbose(
 127                     site_span.shrink_to_hi(),
 128                     "add `;` to interpret the expansion as a statement",
 129                     ";".to_string(),
 130                     Applicability::MaybeIncorrect,
 131                 );
 132             }
 133         },
 134         _ => annotate_err_with_kind(&mut e, kind, site_span),
 135     };
 136     e.emit();
 137 }
 138
 139 impl<'a> ParserAnyMacro<'a> {
 140     crate fn make(mut self: Box<ParserAnyMacro<'a>>, kind: AstFragmentKind) -> AstFragment {
 141         let ParserAnyMacro { site_span, macro_ident, ref mut parser, arm_span } = *self;
 142         let snapshot = &mut parser.clone();
 143         let fragment = match parse_ast_fragment(parser, kind) {
 144             Ok(f) => f,
 145             Err(err) => {
 146                 emit_frag_parse_err(err, parser, snapshot, site_span, macro_ident, arm_span, kind);
 147                 return kind.dummy(site_span);
 148             }
 149         };
 150
 151         // We allow semicolons at the end of expressions -- e.g., the semicolon in
 152         // `macro_rules! m { () => { panic!(); } }` isn't parsed by `.parse_expr()`,
 153         // but `m!()` is allowed in expression positions (cf. issue #34706).
 154         if kind == AstFragmentKind::Expr && parser.token == token::Semi {
 155             parser.bump();
 156         }
 157
 158         // Make sure we don't have any tokens left to parse so we don't silently drop anything.
 159         let path = ast::Path::from_ident(macro_ident.with_span_pos(site_span));
 160         ensure_complete_parse(parser, &path, kind.name(), site_span);
 161         fragment
 162     }
 163 }
 164
 165 struct MacroRulesMacroExpander {
 166     name: Ident,
 167     span: Span,
 168     transparency: Transparency,
 169     lhses: Vec<mbe::TokenTree>,
 170     rhses: Vec<mbe::TokenTree>,
 171     valid: bool,
 172 }
 173
 174 impl TTMacroExpander for MacroRulesMacroExpander {
 175     fn expand<'cx>(
 176         &self,
 177         cx: &'cx mut ExtCtxt<'_>,
 178         sp: Span,
 179         input: TokenStream,
 180     ) -> Box<dyn MacResult + 'cx> {
 181         if !self.valid {
 182             return DummyResult::any(sp);
 183         }
 184         generic_extension(
 185             cx,
 186             sp,
 187             self.span,
 188             self.name,
 189             self.transparency,
 190             input,
 191             &self.lhses,
 192             &self.rhses,
 193         )
 194     }
 195 }
 196
 197 fn macro_rules_dummy_expander<'cx>(
 198     _: &'cx mut ExtCtxt<'_>,
 199     span: Span,
 200     _: TokenStream,
 201 ) -> Box<dyn MacResult + 'cx> {
 202     DummyResult::any(span)
 203 }
 204
 205 fn trace_macros_note(cx_expansions: &mut FxHashMap<Span, Vec<String>>, sp: Span, message: String) {
 206     let sp = sp.macro_backtrace().last().map(|trace| trace.call_site).unwrap_or(sp);
 207     cx_expansions.entry(sp).or_default().push(message);
 208 }
 209
 210 /// Given `lhses` and `rhses`, this is the new macro we create
 211 fn generic_extension<'cx>(
 212     cx: &'cx mut ExtCtxt<'_>,
 213     sp: Span,
 214     def_span: Span,
 215     name: Ident,
 216     transparency: Transparency,
 217     arg: TokenStream,
 218     lhses: &[mbe::TokenTree],
 219     rhses: &[mbe::TokenTree],
 220 ) -> Box<dyn MacResult + 'cx> {
 221     let sess = &cx.sess.parse_sess;
 222
 223     if cx.trace_macros() {
 224         let msg = format!("expanding `{}! {{ {} }}`", name, pprust::tts_to_string(&arg));
 225         trace_macros_note(&mut cx.expansions, sp, msg);
 226     }
 227
 228     // Which arm's failure should we report? (the one furthest along)
 229     let mut best_failure: Option<(Token, &str)> = None;
 230
 231     // We create a base parser that can be used for the "black box" parts.
 232     // Every iteration needs a fresh copy of that parser. However, the parser
 233     // is not mutated on many of the iterations, particularly when dealing with
 234     // macros like this:
 235     //
 236     // macro_rules! foo {
 237     //     ("a") => (A);
 238     //     ("b") => (B);
 239     //     ("c") => (C);
 240     //     // ... etc. (maybe hundreds more)
 241     // }
 242     //
 243     // as seen in the `html5ever` benchmark. We use a `Cow` so that the base
 244     // parser is only cloned when necessary (upon mutation). Furthermore, we
 245     // reinitialize the `Cow` with the base parser at the start of every
 246     // iteration, so that any mutated parsers are not reused. This is all quite
 247     // hacky, but speeds up the `html5ever` benchmark significantly. (Issue
 248     // 68836 suggests a more comprehensive but more complex change to deal with
 249     // this situation.)
 250     let parser = parser_from_cx(sess, arg.clone());
 251
 252     for (i, lhs) in lhses.iter().enumerate() {
 253         // try each arm's matchers
 254         let lhs_tt = match *lhs {
 255             mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..],
 256             _ => cx.span_bug(sp, "malformed macro lhs"),
 257         };
 258
 259         // Take a snapshot of the state of pre-expansion gating at this point.
 260         // This is used so that if a matcher is not `Success(..)`ful,
 261         // then the spans which became gated when parsing the unsuccessful matcher
 262         // are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
 263         let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut());
 264
 265         match parse_tt(&mut Cow::Borrowed(&parser), lhs_tt) {
 266             Success(named_matches) => {
 267                 // The matcher was `Success(..)`ful.
 268                 // Merge the gated spans from parsing the matcher with the pre-existing ones.
 269                 sess.gated_spans.merge(gated_spans_snapshot);
 270
 271                 let rhs = match rhses[i] {
 272                     // ignore delimiters
 273                     mbe::TokenTree::Delimited(_, ref delimed) => delimed.tts.clone(),
 274                     _ => cx.span_bug(sp, "malformed macro rhs"),
 275                 };
 276                 let arm_span = rhses[i].span();
 277
 278                 let rhs_spans = rhs.iter().map(|t| t.span()).collect::<Vec<_>>();
 279                 // rhs has holes ( `$id` and `$(...)` that need filled)
 280                 let mut tts = match transcribe(cx, &named_matches, rhs, transparency) {
 281                     Ok(tts) => tts,
 282                     Err(mut err) => {
 283                         err.emit();
 284                         return DummyResult::any(arm_span);
 285                     }
 286                 };
 287
 288                 // Replace all the tokens for the corresponding positions in the macro, to maintain
 289                 // proper positions in error reporting, while maintaining the macro_backtrace.
 290                 if rhs_spans.len() == tts.len() {
 291                     tts = tts.map_enumerated(|i, mut tt| {
 292                         let mut sp = rhs_spans[i];
 293                         sp = sp.with_ctxt(tt.span().ctxt());
 294                         tt.set_span(sp);
 295                         tt
 296                     });
 297                 }
 298
 299                 if cx.trace_macros() {
 300                     let msg = format!("to `{}`", pprust::tts_to_string(&tts));
 301                     trace_macros_note(&mut cx.expansions, sp, msg);
 302                 }
 303
 304                 let mut p = Parser::new(sess, tts, false, None);
 305                 p.last_type_ascription = cx.current_expansion.prior_type_ascription;
 306
 307                 // Let the context choose how to interpret the result.
 308                 // Weird, but useful for X-macros.
 309                 return Box::new(ParserAnyMacro {
 310                     parser: p,
 311
 312                     // Pass along the original expansion site and the name of the macro
 313                     // so we can print a useful error message if the parse of the expanded
 314                     // macro leaves unparsed tokens.
 315                     site_span: sp,
 316                     macro_ident: name,
 317                     arm_span,
 318                 });
 319             }
 320             Failure(token, msg) => match best_failure {
 321                 Some((ref best_token, _)) if best_token.span.lo() >= token.span.lo() => {}
 322                 _ => best_failure = Some((token, msg)),
 323             },
 324             Error(err_sp, ref msg) => {
 325                 let span = err_sp.substitute_dummy(sp);
 326                 cx.struct_span_err(span, &msg).emit();
 327                 return DummyResult::any(span);
 328             }
 329             ErrorReported => return DummyResult::any(sp),
 330         }
 331
 332         // The matcher was not `Success(..)`ful.
 333         // Restore to the state before snapshotting and maybe try again.
 334         mem::swap(&mut gated_spans_snapshot, &mut sess.gated_spans.spans.borrow_mut());
 335     }
 336     drop(parser);
 337
 338     let (token, label) = best_failure.expect("ran no matchers");
 339     let span = token.span.substitute_dummy(sp);
 340     let mut err = cx.struct_span_err(span, &parse_failure_msg(&token));
 341     err.span_label(span, label);
 342     if !def_span.is_dummy() && !cx.source_map().is_imported(def_span) {
 343         err.span_label(cx.source_map().guess_head_span(def_span), "when calling this macro");
 344     }
 345
 346     // Check whether there's a missing comma in this macro call, like `println!("{}" a);`
 347     if let Some((arg, comma_span)) = arg.add_comma() {
 348         for lhs in lhses {
 349             // try each arm's matchers
 350             let lhs_tt = match *lhs {
 351                 mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..],
 352                 _ => continue,
 353             };
 354             if let Success(_) =
 355                 parse_tt(&mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())), lhs_tt)
 356             {
 357                 if comma_span.is_dummy() {
 358                     err.note("you might be missing a comma");
 359                 } else {
 360                     err.span_suggestion_short(
 361                         comma_span,
 362                         "missing comma here",
 363                         ", ".to_string(),
 364                         Applicability::MachineApplicable,
 365                     );
 366                 }
 367             }
 368         }
 369     }
 370     err.emit();
 371     cx.trace_macros_diag();
 372     DummyResult::any(sp)
 373 }
 374
 375 // Note that macro-by-example's input is also matched against a token tree:
 376 //                   $( $lhs:tt => $rhs:tt );+
 377 //
 378 // Holy self-referential!
 379
 380 /// Converts a macro item into a syntax extension.
 381 pub fn compile_declarative_macro(
 382     sess: &Session,
 383     features: &Features,
 384     def: &ast::Item,
 385     edition: Edition,
 386 ) -> SyntaxExtension {
 387     debug!("compile_declarative_macro: {:?}", def);
 388     let mk_syn_ext = |expander| {
 389         SyntaxExtension::new(
 390             sess,
 391             SyntaxExtensionKind::LegacyBang(expander),
 392             def.span,
 393             Vec::new(),
 394             edition,
 395             def.ident.name,
 396             &def.attrs,
 397         )
 398     };
 399
 400     let diag = &sess.parse_sess.span_diagnostic;
 401     let lhs_nm = Ident::new(sym::lhs, def.span);
 402     let rhs_nm = Ident::new(sym::rhs, def.span);
 403     let tt_spec = Some(NonterminalKind::TT);
 404
 405     // Parse the macro_rules! invocation
 406     let (macro_rules, body) = match &def.kind {
 407         ast::ItemKind::MacroDef(def) => (def.macro_rules, def.body.inner_tokens()),
 408         _ => unreachable!(),
 409     };
 410
 411     // The pattern that macro_rules matches.
 412     // The grammar for macro_rules! is:
 413     // $( $lhs:tt => $rhs:tt );+
 414     // ...quasiquoting this would be nice.
 415     // These spans won't matter, anyways
 416     let argument_gram = vec![
 417         mbe::TokenTree::Sequence(
 418             DelimSpan::dummy(),
 419             Lrc::new(mbe::SequenceRepetition {
 420                 tts: vec![
 421                     mbe::TokenTree::MetaVarDecl(def.span, lhs_nm, tt_spec),
 422                     mbe::TokenTree::token(token::FatArrow, def.span),
 423                     mbe::TokenTree::MetaVarDecl(def.span, rhs_nm, tt_spec),
 424                 ],
 425                 separator: Some(Token::new(
 426                     if macro_rules { token::Semi } else { token::Comma },
 427                     def.span,
 428                 )),
 429                 kleene: mbe::KleeneToken::new(mbe::KleeneOp::OneOrMore, def.span),
 430                 num_captures: 2,
 431             }),
 432         ),
 433         // to phase into semicolon-termination instead of semicolon-separation
 434         mbe::TokenTree::Sequence(
 435             DelimSpan::dummy(),
 436             Lrc::new(mbe::SequenceRepetition {
 437                 tts: vec![mbe::TokenTree::token(
 438                     if macro_rules { token::Semi } else { token::Comma },
 439                     def.span,
 440                 )],
 441                 separator: None,
 442                 kleene: mbe::KleeneToken::new(mbe::KleeneOp::ZeroOrMore, def.span),
 443                 num_captures: 0,
 444             }),
 445         ),
 446     ];
 447
 448     let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS);
 449     let argument_map = match parse_tt(&mut Cow::Borrowed(&parser), &argument_gram) {
 450         Success(m) => m,
 451         Failure(token, msg) => {
 452             let s = parse_failure_msg(&token);
 453             let sp = token.span.substitute_dummy(def.span);
 454             sess.parse_sess.span_diagnostic.struct_span_err(sp, &s).span_label(sp, msg).emit();
 455             return mk_syn_ext(Box::new(macro_rules_dummy_expander));
 456         }
 457         Error(sp, msg) => {
 458             sess.parse_sess
 459                 .span_diagnostic
 460                 .struct_span_err(sp.substitute_dummy(def.span), &msg)
 461                 .emit();
 462             return mk_syn_ext(Box::new(macro_rules_dummy_expander));
 463         }
 464         ErrorReported => {
 465             return mk_syn_ext(Box::new(macro_rules_dummy_expander));
 466         }
 467     };
 468
 469     let mut valid = true;
 470
 471     // Extract the arguments:
 472     let lhses = match argument_map[&MacroRulesNormalizedIdent::new(lhs_nm)] {
 473         MatchedSeq(ref s) => s
 474             .iter()
 475             .map(|m| {
 476                 if let MatchedNonterminal(ref nt) = *m {
 477                     if let NtTT(ref tt) = **nt {
 478                         let tt =
 479                             mbe::quoted::parse(tt.clone().into(), true, &sess.parse_sess, def.id)
 480                                 .pop()
 481                                 .unwrap();
 482                         valid &= check_lhs_nt_follows(&sess.parse_sess, features, &def.attrs, &tt);
 483                         return tt;
 484                     }
 485                 }
 486                 sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs")
 487             })
 488             .collect::<Vec<mbe::TokenTree>>(),
 489         _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs"),
 490     };
 491
 492     let rhses = match argument_map[&MacroRulesNormalizedIdent::new(rhs_nm)] {
 493         MatchedSeq(ref s) => s
 494             .iter()
 495             .map(|m| {
 496                 if let MatchedNonterminal(ref nt) = *m {
 497                     if let NtTT(ref tt) = **nt {
 498                         return mbe::quoted::parse(
 499                             tt.clone().into(),
 500                             false,
 501                             &sess.parse_sess,
 502                             def.id,
 503                         )
 504                         .pop()
 505                         .unwrap();
 506                     }
 507                 }
 508                 sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs")
 509             })
 510             .collect::<Vec<mbe::TokenTree>>(),
 511         _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured rhs"),
 512     };
 513
 514     for rhs in &rhses {
 515         valid &= check_rhs(&sess.parse_sess, rhs);
 516     }
 517
 518     // don't abort iteration early, so that errors for multiple lhses can be reported
 519     for lhs in &lhses {
 520         valid &= check_lhs_no_empty_seq(&sess.parse_sess, slice::from_ref(lhs));
 521     }
 522
 523     valid &= macro_check::check_meta_variables(&sess.parse_sess, def.id, def.span, &lhses, &rhses);
 524
 525     let (transparency, transparency_error) = attr::find_transparency(sess, &def.attrs, macro_rules);
 526     match transparency_error {
 527         Some(TransparencyError::UnknownTransparency(value, span)) => {
 528             diag.span_err(span, &format!("unknown macro transparency: `{}`", value))
 529         }
 530         Some(TransparencyError::MultipleTransparencyAttrs(old_span, new_span)) => {
 531             diag.span_err(vec![old_span, new_span], "multiple macro transparency attributes")
 532         }
 533         None => {}
 534     }
 535
 536     mk_syn_ext(Box::new(MacroRulesMacroExpander {
 537         name: def.ident,
 538         span: def.span,
 539         transparency,
 540         lhses,
 541         rhses,
 542         valid,
 543     }))
 544 }
 545
 546 fn check_lhs_nt_follows(
 547     sess: &ParseSess,
 548     features: &Features,
 549     attrs: &[ast::Attribute],
 550     lhs: &mbe::TokenTree,
 551 ) -> bool {
 552     // lhs is going to be like TokenTree::Delimited(...), where the
 553     // entire lhs is those tts. Or, it can be a "bare sequence", not wrapped in parens.
 554     if let mbe::TokenTree::Delimited(_, ref tts) = *lhs {
 555         check_matcher(sess, features, attrs, &tts.tts)
 556     } else {
 557         let msg = "invalid macro matcher; matchers must be contained in balanced delimiters";
 558         sess.span_diagnostic.span_err(lhs.span(), msg);
 559         false
 560     }
 561     // we don't abort on errors on rejection, the driver will do that for us
 562     // after parsing/expansion. we can report every error in every macro this way.
 563 }
 564
 565 /// Checks that the lhs contains no repetition which could match an empty token
 566 /// tree, because then the matcher would hang indefinitely.
 567 fn check_lhs_no_empty_seq(sess: &ParseSess, tts: &[mbe::TokenTree]) -> bool {
 568     use mbe::TokenTree;
 569     for tt in tts {
 570         match *tt {
 571             TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => (),
 572             TokenTree::Delimited(_, ref del) => {
 573                 if !check_lhs_no_empty_seq(sess, &del.tts) {
 574                     return false;
 575                 }
 576             }
 577             TokenTree::Sequence(span, ref seq) => {
 578                 if seq.separator.is_none()
 579                     && seq.tts.iter().all(|seq_tt| match *seq_tt {
 580                         TokenTree::MetaVarDecl(_, _, Some(NonterminalKind::Vis)) => true,
 581                         TokenTree::Sequence(_, ref sub_seq) => {
 582                             sub_seq.kleene.op == mbe::KleeneOp::ZeroOrMore
 583                                 || sub_seq.kleene.op == mbe::KleeneOp::ZeroOrOne
 584                         }
 585                         _ => false,
 586                     })
 587                 {
 588                     let sp = span.entire();
 589                     sess.span_diagnostic.span_err(sp, "repetition matches empty token tree");
 590                     return false;
 591                 }
 592                 if !check_lhs_no_empty_seq(sess, &seq.tts) {
 593                     return false;
 594                 }
 595             }
 596         }
 597     }
 598
 599     true
 600 }
 601
 602 fn check_rhs(sess: &ParseSess, rhs: &mbe::TokenTree) -> bool {
 603     match *rhs {
 604         mbe::TokenTree::Delimited(..) => return true,
 605         _ => sess.span_diagnostic.span_err(rhs.span(), "macro rhs must be delimited"),
 606     }
 607     false
 608 }
 609
 610 fn check_matcher(
 611     sess: &ParseSess,
 612     features: &Features,
 613     attrs: &[ast::Attribute],
 614     matcher: &[mbe::TokenTree],
 615 ) -> bool {
 616     let first_sets = FirstSets::new(matcher);
 617     let empty_suffix = TokenSet::empty();
 618     let err = sess.span_diagnostic.err_count();
 619     check_matcher_core(sess, features, attrs, &first_sets, matcher, &empty_suffix);
 620     err == sess.span_diagnostic.err_count()
 621 }
 622
 623 // `The FirstSets` for a matcher is a mapping from subsequences in the
 624 // matcher to the FIRST set for that subsequence.
 625 //
 626 // This mapping is partially precomputed via a backwards scan over the
 627 // token trees of the matcher, which provides a mapping from each
 628 // repetition sequence to its *first* set.
 629 //
 630 // (Hypothetically, sequences should be uniquely identifiable via their
 631 // spans, though perhaps that is false, e.g., for macro-generated macros
 632 // that do not try to inject artificial span information. My plan is
 633 // to try to catch such cases ahead of time and not include them in
 634 // the precomputed mapping.)
 635 struct FirstSets {
 636     // this maps each TokenTree::Sequence `$(tt ...) SEP OP` that is uniquely identified by its
 637     // span in the original matcher to the First set for the inner sequence `tt ...`.
 638     //
 639     // If two sequences have the same span in a matcher, then map that
 640     // span to None (invalidating the mapping here and forcing the code to
 641     // use a slow path).
 642     first: FxHashMap<Span, Option<TokenSet>>,
 643 }
 644
 645 impl FirstSets {
 646     fn new(tts: &[mbe::TokenTree]) -> FirstSets {
 647         use mbe::TokenTree;
 648
 649         let mut sets = FirstSets { first: FxHashMap::default() };
 650         build_recur(&mut sets, tts);
 651         return sets;
 652
 653         // walks backward over `tts`, returning the FIRST for `tts`
 654         // and updating `sets` at the same time for all sequence
 655         // substructure we find within `tts`.
 656         fn build_recur(sets: &mut FirstSets, tts: &[TokenTree]) -> TokenSet {
 657             let mut first = TokenSet::empty();
 658             for tt in tts.iter().rev() {
 659                 match *tt {
 660                     TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => {
 661                         first.replace_with(tt.clone());
 662                     }
 663                     TokenTree::Delimited(span, ref delimited) => {
 664                         build_recur(sets, &delimited.tts[..]);
 665                         first.replace_with(delimited.open_tt(span));
 666                     }
 667                     TokenTree::Sequence(sp, ref seq_rep) => {
 668                         let subfirst = build_recur(sets, &seq_rep.tts[..]);
 669
 670                         match sets.first.entry(sp.entire()) {
 671                             Entry::Vacant(vac) => {
 672                                 vac.insert(Some(subfirst.clone()));
 673                             }
 674                             Entry::Occupied(mut occ) => {
 675                                 // if there is already an entry, then a span must have collided.
 676                                 // This should not happen with typical macro_rules macros,
 677                                 // but syntax extensions need not maintain distinct spans,
 678                                 // so distinct syntax trees can be assigned the same span.
 679                                 // In such a case, the map cannot be trusted; so mark this
 680                                 // entry as unusable.
 681                                 occ.insert(None);
 682                             }
 683                         }
 684
 685                         // If the sequence contents can be empty, then the first
 686                         // token could be the separator token itself.
 687
 688                         if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) {
 689                             first.add_one_maybe(TokenTree::Token(sep.clone()));
 690                         }
 691
 692                         // Reverse scan: Sequence comes before `first`.
 693                         if subfirst.maybe_empty
 694                             || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrMore
 695                             || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrOne
 696                         {
 697                             // If sequence is potentially empty, then
 698                             // union them (preserving first emptiness).
 699                             first.add_all(&TokenSet { maybe_empty: true, ..subfirst });
 700                         } else {
 701                             // Otherwise, sequence guaranteed
 702                             // non-empty; replace first.
 703                             first = subfirst;
 704                         }
 705                     }
 706                 }
 707             }
 708
 709             first
 710         }
 711     }
 712
 713     // walks forward over `tts` until all potential FIRST tokens are
 714     // identified.
 715     fn first(&self, tts: &[mbe::TokenTree]) -> TokenSet {
 716         use mbe::TokenTree;
 717
 718         let mut first = TokenSet::empty();
 719         for tt in tts.iter() {
 720             assert!(first.maybe_empty);
 721             match *tt {
 722                 TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => {
 723                     first.add_one(tt.clone());
 724                     return first;
 725                 }
 726                 TokenTree::Delimited(span, ref delimited) => {
 727                     first.add_one(delimited.open_tt(span));
 728                     return first;
 729                 }
 730                 TokenTree::Sequence(sp, ref seq_rep) => {
 731                     let subfirst_owned;
 732                     let subfirst = match self.first.get(&sp.entire()) {
 733                         Some(&Some(ref subfirst)) => subfirst,
 734                         Some(&None) => {
 735                             subfirst_owned = self.first(&seq_rep.tts[..]);
 736                             &subfirst_owned
 737                         }
 738                         None => {
 739                             panic!("We missed a sequence during FirstSets construction");
 740                         }
 741                     };
 742
 743                     // If the sequence contents can be empty, then the first
 744                     // token could be the separator token itself.
 745                     if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) {
 746                         first.add_one_maybe(TokenTree::Token(sep.clone()));
 747                     }
 748
 749                     assert!(first.maybe_empty);
 750                     first.add_all(subfirst);
 751                     if subfirst.maybe_empty
 752                         || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrMore
 753                         || seq_rep.kleene.op == mbe::KleeneOp::ZeroOrOne
 754                     {
 755                         // Continue scanning for more first
 756                         // tokens, but also make sure we
 757                         // restore empty-tracking state.
 758                         first.maybe_empty = true;
 759                         continue;
 760                     } else {
 761                         return first;
 762                     }
 763                 }
 764             }
 765         }
 766
 767         // we only exit the loop if `tts` was empty or if every
 768         // element of `tts` matches the empty sequence.
 769         assert!(first.maybe_empty);
 770         first
 771     }
 772 }
 773
 774 // A set of `mbe::TokenTree`s, which may include `TokenTree::Match`s
 775 // (for macro-by-example syntactic variables). It also carries the
 776 // `maybe_empty` flag; that is true if and only if the matcher can
 777 // match an empty token sequence.
 778 //
 779 // The First set is computed on submatchers like `$($a:expr b),* $(c)* d`,
 780 // which has corresponding FIRST = {$a:expr, c, d}.
 781 // Likewise, `$($a:expr b),* $(c)+ d` has FIRST = {$a:expr, c}.
 782 //
 783 // (Notably, we must allow for *-op to occur zero times.)
 784 #[derive(Clone, Debug)]
 785 struct TokenSet {
 786     tokens: Vec<mbe::TokenTree>,
 787     maybe_empty: bool,
 788 }
 789
 790 impl TokenSet {
 791     // Returns a set for the empty sequence.
 792     fn empty() -> Self {
 793         TokenSet { tokens: Vec::new(), maybe_empty: true }
 794     }
 795
 796     // Returns the set `{ tok }` for the single-token (and thus
 797     // non-empty) sequence [tok].
 798     fn singleton(tok: mbe::TokenTree) -> Self {
 799         TokenSet { tokens: vec![tok], maybe_empty: false }
 800     }
 801
 802     // Changes self to be the set `{ tok }`.
 803     // Since `tok` is always present, marks self as non-empty.
 804     fn replace_with(&mut self, tok: mbe::TokenTree) {
 805         self.tokens.clear();
 806         self.tokens.push(tok);
 807         self.maybe_empty = false;
 808     }
 809
 810     // Changes self to be the empty set `{}`; meant for use when
 811     // the particular token does not matter, but we want to
 812     // record that it occurs.
 813     fn replace_with_irrelevant(&mut self) {
 814         self.tokens.clear();
 815         self.maybe_empty = false;
 816     }
 817
 818     // Adds `tok` to the set for `self`, marking sequence as non-empy.
 819     fn add_one(&mut self, tok: mbe::TokenTree) {
 820         if !self.tokens.contains(&tok) {
 821             self.tokens.push(tok);
 822         }
 823         self.maybe_empty = false;
 824     }
 825
 826     // Adds `tok` to the set for `self`. (Leaves `maybe_empty` flag alone.)
 827     fn add_one_maybe(&mut self, tok: mbe::TokenTree) {
 828         if !self.tokens.contains(&tok) {
 829             self.tokens.push(tok);
 830         }
 831     }
 832
 833     // Adds all elements of `other` to this.
 834     //
 835     // (Since this is a set, we filter out duplicates.)
 836     //
 837     // If `other` is potentially empty, then preserves the previous
 838     // setting of the empty flag of `self`. If `other` is guaranteed
 839     // non-empty, then `self` is marked non-empty.
 840     fn add_all(&mut self, other: &Self) {
 841         for tok in &other.tokens {
 842             if !self.tokens.contains(tok) {
 843                 self.tokens.push(tok.clone());
 844             }
 845         }
 846         if !other.maybe_empty {
 847             self.maybe_empty = false;
 848         }
 849     }
 850 }
 851
 852 // Checks that `matcher` is internally consistent and that it
 853 // can legally be followed by a token `N`, for all `N` in `follow`.
 854 // (If `follow` is empty, then it imposes no constraint on
 855 // the `matcher`.)
 856 //
 857 // Returns the set of NT tokens that could possibly come last in
 858 // `matcher`. (If `matcher` matches the empty sequence, then
 859 // `maybe_empty` will be set to true.)
 860 //
 861 // Requires that `first_sets` is pre-computed for `matcher`;
 862 // see `FirstSets::new`.
 863 fn check_matcher_core(
 864     sess: &ParseSess,
 865     features: &Features,
 866     attrs: &[ast::Attribute],
 867     first_sets: &FirstSets,
 868     matcher: &[mbe::TokenTree],
 869     follow: &TokenSet,
 870 ) -> TokenSet {
 871     use mbe::TokenTree;
 872
 873     let mut last = TokenSet::empty();
 874
 875     // 2. For each token and suffix  [T, SUFFIX] in M:
 876     // ensure that T can be followed by SUFFIX, and if SUFFIX may be empty,
 877     // then ensure T can also be followed by any element of FOLLOW.
 878     'each_token: for i in 0..matcher.len() {
 879         let token = &matcher[i];
 880         let suffix = &matcher[i + 1..];
 881
 882         let build_suffix_first = || {
 883             let mut s = first_sets.first(suffix);
 884             if s.maybe_empty {
 885                 s.add_all(follow);
 886             }
 887             s
 888         };
 889
 890         // (we build `suffix_first` on demand below; you can tell
 891         // which cases are supposed to fall through by looking for the
 892         // initialization of this variable.)
 893         let suffix_first;
 894
 895         // First, update `last` so that it corresponds to the set
 896         // of NT tokens that might end the sequence `... token`.
 897         match *token {
 898             TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => {
 899                 if token_can_be_followed_by_any(token) {
 900                     // don't need to track tokens that work with any,
 901                     last.replace_with_irrelevant();
 902                     // ... and don't need to check tokens that can be
 903                     // followed by anything against SUFFIX.
 904                     continue 'each_token;
 905                 } else {
 906                     last.replace_with(token.clone());
 907                     suffix_first = build_suffix_first();
 908                 }
 909             }
 910             TokenTree::Delimited(span, ref d) => {
 911                 let my_suffix = TokenSet::singleton(d.close_tt(span));
 912                 check_matcher_core(sess, features, attrs, first_sets, &d.tts, &my_suffix);
 913                 // don't track non NT tokens
 914                 last.replace_with_irrelevant();
 915
 916                 // also, we don't need to check delimited sequences
 917                 // against SUFFIX
 918                 continue 'each_token;
 919             }
 920             TokenTree::Sequence(_, ref seq_rep) => {
 921                 suffix_first = build_suffix_first();
 922                 // The trick here: when we check the interior, we want
 923                 // to include the separator (if any) as a potential
 924                 // (but not guaranteed) element of FOLLOW. So in that
 925                 // case, we make a temp copy of suffix and stuff
 926                 // delimiter in there.
 927                 //
 928                 // FIXME: Should I first scan suffix_first to see if
 929                 // delimiter is already in it before I go through the
 930                 // work of cloning it? But then again, this way I may
 931                 // get a "tighter" span?
 932                 let mut new;
 933                 let my_suffix = if let Some(sep) = &seq_rep.separator {
 934                     new = suffix_first.clone();
 935                     new.add_one_maybe(TokenTree::Token(sep.clone()));
 936                     &new
 937                 } else {
 938                     &suffix_first
 939                 };
 940
 941                 // At this point, `suffix_first` is built, and
 942                 // `my_suffix` is some TokenSet that we can use
 943                 // for checking the interior of `seq_rep`.
 944                 let next =
 945                     check_matcher_core(sess, features, attrs, first_sets, &seq_rep.tts, my_suffix);
 946                 if next.maybe_empty {
 947                     last.add_all(&next);
 948                 } else {
 949                     last = next;
 950                 }
 951
 952                 // the recursive call to check_matcher_core already ran the 'each_last
 953                 // check below, so we can just keep going forward here.
 954                 continue 'each_token;
 955             }
 956         }
 957
 958         // (`suffix_first` guaranteed initialized once reaching here.)
 959
 960         // Now `last` holds the complete set of NT tokens that could
 961         // end the sequence before SUFFIX. Check that every one works with `suffix`.
 962         for token in &last.tokens {
 963             if let TokenTree::MetaVarDecl(_, name, Some(kind)) = *token {
 964                 for next_token in &suffix_first.tokens {
 965                     match is_in_follow(next_token, kind) {
 966                         IsInFollow::Yes => {}
 967                         IsInFollow::No(possible) => {
 968                             let may_be = if last.tokens.len() == 1 && suffix_first.tokens.len() == 1
 969                             {
 970                                 "is"
 971                             } else {
 972                                 "may be"
 973                             };
 974
 975                             let sp = next_token.span();
 976                             let mut err = sess.span_diagnostic.struct_span_err(
 977                                 sp,
 978                                 &format!(
 979                                     "`${name}:{frag}` {may_be} followed by `{next}`, which \
 980                                      is not allowed for `{frag}` fragments",
 981                                     name = name,
 982                                     frag = kind,
 983                                     next = quoted_tt_to_string(next_token),
 984                                     may_be = may_be
 985                                 ),
 986                             );
 987                             err.span_label(sp, format!("not allowed after `{}` fragments", kind));
 988                             let msg = "allowed there are: ";
 989                             match possible {
 990                                 &[] => {}
 991                                 &[t] => {
 992                                     err.note(&format!(
 993                                         "only {} is allowed after `{}` fragments",
 994                                         t, kind,
 995                                     ));
 996                                 }
 997                                 ts => {
 998                                     err.note(&format!(
 999                                         "{}{} or {}",
1000                                         msg,
1001                                         ts[..ts.len() - 1]
1002                                             .iter()
1003                                             .copied()
1004                                             .collect::<Vec<_>>()
1005                                             .join(", "),
1006                                         ts[ts.len() - 1],
1007                                     ));
1008                                 }
1009                             }
1010                             err.emit();
1011                         }
1012                     }
1013                 }
1014             }
1015         }
1016     }
1017     last
1018 }
1019
1020 fn token_can_be_followed_by_any(tok: &mbe::TokenTree) -> bool {
1021     if let mbe::TokenTree::MetaVarDecl(_, _, Some(kind)) = *tok {
1022         frag_can_be_followed_by_any(kind)
1023     } else {
1024         // (Non NT's can always be followed by anything in matchers.)
1025         true
1026     }
1027 }
1028
1029 /// Returns `true` if a fragment of type `frag` can be followed by any sort of
1030 /// token. We use this (among other things) as a useful approximation
1031 /// for when `frag` can be followed by a repetition like `$(...)*` or
1032 /// `$(...)+`. In general, these can be a bit tricky to reason about,
1033 /// so we adopt a conservative position that says that any fragment
1034 /// specifier which consumes at most one token tree can be followed by
1035 /// a fragment specifier (indeed, these fragments can be followed by
1036 /// ANYTHING without fear of future compatibility hazards).
1037 fn frag_can_be_followed_by_any(kind: NonterminalKind) -> bool {
1038     match kind {
1039         NonterminalKind::Item           // always terminated by `}` or `;`
1040         | NonterminalKind::Block        // exactly one token tree
1041         | NonterminalKind::Ident        // exactly one token tree
1042         | NonterminalKind::Literal      // exactly one token tree
1043         | NonterminalKind::Meta         // exactly one token tree
1044         | NonterminalKind::Lifetime     // exactly one token tree
1045         | NonterminalKind::TT => true,  // exactly one token tree
1046
1047         _ => false,
1048     }
1049 }
1050
1051 enum IsInFollow {
1052     Yes,
1053     No(&'static [&'static str]),
1054 }
1055
1056 /// Returns `true` if `frag` can legally be followed by the token `tok`. For
1057 /// fragments that can consume an unbounded number of tokens, `tok`
1058 /// must be within a well-defined follow set. This is intended to
1059 /// guarantee future compatibility: for example, without this rule, if
1060 /// we expanded `expr` to include a new binary operator, we might
1061 /// break macros that were relying on that binary operator as a
1062 /// separator.
1063 // when changing this do not forget to update doc/book/macros.md!
1064 fn is_in_follow(tok: &mbe::TokenTree, kind: NonterminalKind) -> IsInFollow {
1065     use mbe::TokenTree;
1066
1067     if let TokenTree::Token(Token { kind: token::CloseDelim(_), .. }) = *tok {
1068         // closing a token tree can never be matched by any fragment;
1069         // iow, we always require that `(` and `)` match, etc.
1070         IsInFollow::Yes
1071     } else {
1072         match kind {
1073             NonterminalKind::Item => {
1074                 // since items *must* be followed by either a `;` or a `}`, we can
1075                 // accept anything after them
1076                 IsInFollow::Yes
1077             }
1078             NonterminalKind::Block => {
1079                 // anything can follow block, the braces provide an easy boundary to
1080                 // maintain
1081                 IsInFollow::Yes
1082             }
1083             NonterminalKind::Stmt | NonterminalKind::Expr => {
1084                 const TOKENS: &[&str] = &["`=>`", "`,`", "`;`"];
1085                 match tok {
1086                     TokenTree::Token(token) => match token.kind {
1087                         FatArrow | Comma | Semi => IsInFollow::Yes,
1088                         _ => IsInFollow::No(TOKENS),
1089                     },
1090                     _ => IsInFollow::No(TOKENS),
1091                 }
1092             }
1093             NonterminalKind::Pat => {
1094                 const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`|`", "`if`", "`in`"];
1095                 match tok {
1096                     TokenTree::Token(token) => match token.kind {
1097                         FatArrow | Comma | Eq | BinOp(token::Or) => IsInFollow::Yes,
1098                         Ident(name, false) if name == kw::If || name == kw::In => IsInFollow::Yes,
1099                         _ => IsInFollow::No(TOKENS),
1100                     },
1101                     _ => IsInFollow::No(TOKENS),
1102                 }
1103             }
1104             NonterminalKind::Path | NonterminalKind::Ty => {
1105                 const TOKENS: &[&str] = &[
1106                     "`{`", "`[`", "`=>`", "`,`", "`>`", "`=`", "`:`", "`;`", "`|`", "`as`",
1107                     "`where`",
1108                 ];
1109                 match tok {
1110                     TokenTree::Token(token) => match token.kind {
1111                         OpenDelim(token::DelimToken::Brace)
1112                         | OpenDelim(token::DelimToken::Bracket)
1113                         | Comma
1114                         | FatArrow
1115                         | Colon
1116                         | Eq
1117                         | Gt
1118                         | BinOp(token::Shr)
1119                         | Semi
1120                         | BinOp(token::Or) => IsInFollow::Yes,
1121                         Ident(name, false) if name == kw::As || name == kw::Where => {
1122                             IsInFollow::Yes
1123                         }
1124                         _ => IsInFollow::No(TOKENS),
1125                     },
1126                     TokenTree::MetaVarDecl(_, _, Some(NonterminalKind::Block)) => IsInFollow::Yes,
1127                     _ => IsInFollow::No(TOKENS),
1128                 }
1129             }
1130             NonterminalKind::Ident | NonterminalKind::Lifetime => {
1131                 // being a single token, idents and lifetimes are harmless
1132                 IsInFollow::Yes
1133             }
1134             NonterminalKind::Literal => {
1135                 // literals may be of a single token, or two tokens (negative numbers)
1136                 IsInFollow::Yes
1137             }
1138             NonterminalKind::Meta | NonterminalKind::TT => {
1139                 // being either a single token or a delimited sequence, tt is
1140                 // harmless
1141                 IsInFollow::Yes
1142             }
1143             NonterminalKind::Vis => {
1144                 // Explicitly disallow `priv`, on the off chance it comes back.
1145                 const TOKENS: &[&str] = &["`,`", "an ident", "a type"];
1146                 match tok {
1147                     TokenTree::Token(token) => match token.kind {
1148                         Comma => IsInFollow::Yes,
1149                         Ident(name, is_raw) if is_raw || name != kw::Priv => IsInFollow::Yes,
1150                         _ => {
1151                             if token.can_begin_type() {
1152                                 IsInFollow::Yes
1153                             } else {
1154                                 IsInFollow::No(TOKENS)
1155                             }
1156                         }
1157                     },
1158                     TokenTree::MetaVarDecl(
1159                         _,
1160                         _,
1161                         Some(NonterminalKind::Ident | NonterminalKind::Ty | NonterminalKind::Path),
1162                     ) => IsInFollow::Yes,
1163                     _ => IsInFollow::No(TOKENS),
1164                 }
1165             }
1166         }
1167     }
1168 }
1169
1170 fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
1171     match *tt {
1172         mbe::TokenTree::Token(ref token) => pprust::token_to_string(&token),
1173         mbe::TokenTree::MetaVar(_, name) => format!("${}", name),
1174         mbe::TokenTree::MetaVarDecl(_, name, Some(kind)) => format!("${}:{}", name, kind),
1175         mbe::TokenTree::MetaVarDecl(_, name, None) => format!("${}:", name),
1176         _ => panic!(
1177             "unexpected mbe::TokenTree::{{Sequence or Delimited}} \
1178              in follow set checker"
1179         ),
1180     }
1181 }
1182
1183 fn parser_from_cx(sess: &ParseSess, tts: TokenStream) -> Parser<'_> {
1184     Parser::new(sess, tts, true, rustc_parse::MACRO_ARGUMENTS)
1185 }
1186
1187 /// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For
1188 /// other tokens, this is "unexpected token...".
1189 fn parse_failure_msg(tok: &Token) -> String {
1190     match tok.kind {
1191         token::Eof => "unexpected end of macro invocation".to_string(),
1192         _ => format!("no rules expected the token `{}`", pprust::token_to_string(tok),),
1193     }
1194 }