1 //! The main parser interface.
3 #![feature(bool_to_option)]
4 #![feature(crate_visibility_modifier)]
5 #![feature(bindings_after_at)]
6 #![feature(or_patterns)]
9 use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
10 use rustc_ast::tokenstream::{self, Spacing, TokenStream, TokenTree};
11 use rustc_ast_pretty::pprust;
12 use rustc_data_structures::sync::Lrc;
13 use rustc_errors::{Diagnostic, FatalError, Level, PResult};
14 use rustc_session::parse::ParseSess;
15 use rustc_span::{symbol::kw, FileName, SourceFile, Span, DUMMY_SP};
17 use smallvec::SmallVec;
22 use tracing::{debug, info};
24 pub const MACRO_ARGUMENTS: Option<&'static str> = Some("macro arguments");
28 use parser::{emit_unclosed_delims, make_unclosed_delims_error, Parser};
30 pub mod validate_attr;
32 // A bunch of utility functions of the form `parse_<thing>_from_<source>`
33 // where <thing> includes crate, expr, item, stmt, tts, and one that
34 // uses a HOF to parse anything, and <source> includes file and
37 /// A variant of 'panictry!' that works on a Vec<Diagnostic> instead of a single DiagnosticBuilder.
38 macro_rules! panictry_buffer {
39 ($handler:expr, $e:expr) => {{
40 use rustc_errors::FatalError;
41 use std::result::Result::{Err, Ok};
46 $handler.emit_diagnostic(&e);
54 pub fn parse_crate_from_file<'a>(input: &Path, sess: &'a ParseSess) -> PResult<'a, ast::Crate> {
55 let mut parser = new_parser_from_file(sess, input, None);
56 parser.parse_crate_mod()
59 pub fn parse_crate_attrs_from_file<'a>(
62 ) -> PResult<'a, Vec<ast::Attribute>> {
63 let mut parser = new_parser_from_file(sess, input, None);
64 parser.parse_inner_attributes()
67 pub fn parse_crate_from_source_str(
71 ) -> PResult<'_, ast::Crate> {
72 new_parser_from_source_str(sess, name, source).parse_crate_mod()
75 pub fn parse_crate_attrs_from_source_str(
79 ) -> PResult<'_, Vec<ast::Attribute>> {
80 new_parser_from_source_str(sess, name, source).parse_inner_attributes()
83 pub fn parse_stream_from_source_str(
87 override_span: Option<Span>,
89 let (stream, mut errors) =
90 source_file_to_stream(sess, sess.source_map().new_source_file(name, source), override_span);
91 emit_unclosed_delims(&mut errors, &sess);
95 /// Creates a new parser from a source string.
96 pub fn new_parser_from_source_str(sess: &ParseSess, name: FileName, source: String) -> Parser<'_> {
97 panictry_buffer!(&sess.span_diagnostic, maybe_new_parser_from_source_str(sess, name, source))
100 /// Creates a new parser from a source string. Returns any buffered errors from lexing the initial
102 pub fn maybe_new_parser_from_source_str(
106 ) -> Result<Parser<'_>, Vec<Diagnostic>> {
107 maybe_source_file_to_parser(sess, sess.source_map().new_source_file(name, source))
110 /// Creates a new parser, handling errors as appropriate if the file doesn't exist.
111 /// If a span is given, that is used on an error as the source of the problem.
112 pub fn new_parser_from_file<'a>(sess: &'a ParseSess, path: &Path, sp: Option<Span>) -> Parser<'a> {
113 source_file_to_parser(sess, file_to_source_file(sess, path, sp))
116 /// Creates a new parser, returning buffered diagnostics if the file doesn't exist,
117 /// or from lexing the initial token stream.
118 pub fn maybe_new_parser_from_file<'a>(
121 ) -> Result<Parser<'a>, Vec<Diagnostic>> {
122 let file = try_file_to_source_file(sess, path, None).map_err(|db| vec![db])?;
123 maybe_source_file_to_parser(sess, file)
126 /// Given a `source_file` and config, returns a parser.
127 fn source_file_to_parser(sess: &ParseSess, source_file: Lrc<SourceFile>) -> Parser<'_> {
128 panictry_buffer!(&sess.span_diagnostic, maybe_source_file_to_parser(sess, source_file))
131 /// Given a `source_file` and config, return a parser. Returns any buffered errors from lexing the
132 /// initial token stream.
133 fn maybe_source_file_to_parser(
135 source_file: Lrc<SourceFile>,
136 ) -> Result<Parser<'_>, Vec<Diagnostic>> {
137 let end_pos = source_file.end_pos;
138 let (stream, unclosed_delims) = maybe_file_to_stream(sess, source_file, None)?;
139 let mut parser = stream_to_parser(sess, stream, None);
140 parser.unclosed_delims = unclosed_delims;
141 if parser.token == token::Eof {
142 parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt());
148 // Must preserve old name for now, because `quote!` from the *existing*
149 // compiler expands into it.
150 pub fn new_parser_from_tts(sess: &ParseSess, tts: Vec<TokenTree>) -> Parser<'_> {
151 stream_to_parser(sess, tts.into_iter().collect(), crate::MACRO_ARGUMENTS)
156 /// Given a session and a path and an optional span (for error reporting),
157 /// add the path to the session's source_map and return the new source_file or
158 /// error when a file can't be read.
159 fn try_file_to_source_file(
162 spanopt: Option<Span>,
163 ) -> Result<Lrc<SourceFile>, Diagnostic> {
164 sess.source_map().load_file(path).map_err(|e| {
165 let msg = format!("couldn't read {}: {}", path.display(), e);
166 let mut diag = Diagnostic::new(Level::Fatal, &msg);
167 if let Some(sp) = spanopt {
174 /// Given a session and a path and an optional span (for error reporting),
175 /// adds the path to the session's `source_map` and returns the new `source_file`.
176 fn file_to_source_file(sess: &ParseSess, path: &Path, spanopt: Option<Span>) -> Lrc<SourceFile> {
177 match try_file_to_source_file(sess, path, spanopt) {
178 Ok(source_file) => source_file,
180 sess.span_diagnostic.emit_diagnostic(&d);
186 /// Given a `source_file`, produces a sequence of token trees.
187 pub fn source_file_to_stream(
189 source_file: Lrc<SourceFile>,
190 override_span: Option<Span>,
191 ) -> (TokenStream, Vec<lexer::UnmatchedBrace>) {
192 panictry_buffer!(&sess.span_diagnostic, maybe_file_to_stream(sess, source_file, override_span))
195 /// Given a source file, produces a sequence of token trees. Returns any buffered errors from
196 /// parsing the token stream.
197 pub fn maybe_file_to_stream(
199 source_file: Lrc<SourceFile>,
200 override_span: Option<Span>,
201 ) -> Result<(TokenStream, Vec<lexer::UnmatchedBrace>), Vec<Diagnostic>> {
202 let src = source_file.src.as_ref().unwrap_or_else(|| {
204 .bug(&format!("cannot lex `source_file` without source: {}", source_file.name));
207 let (token_trees, unmatched_braces) =
208 lexer::parse_token_trees(sess, src.as_str(), source_file.start_pos, override_span);
211 Ok(stream) => Ok((stream, unmatched_braces)),
213 let mut buffer = Vec::with_capacity(1);
214 err.buffer(&mut buffer);
215 // Not using `emit_unclosed_delims` to use `db.buffer`
216 for unmatched in unmatched_braces {
217 if let Some(err) = make_unclosed_delims_error(unmatched, &sess) {
218 err.buffer(&mut buffer);
226 /// Given a stream and the `ParseSess`, produces a parser.
227 pub fn stream_to_parser<'a>(
230 subparser_name: Option<&'static str>,
232 Parser::new(sess, stream, false, subparser_name)
235 /// Runs the given subparser `f` on the tokens of the given `attr`'s item.
236 pub fn parse_in<'a, T>(
240 mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
241 ) -> PResult<'a, T> {
242 let mut parser = Parser::new(sess, tts, false, Some(name));
243 let result = f(&mut parser)?;
244 if parser.token != token::Eof {
245 parser.unexpected()?;
250 // NOTE(Centril): The following probably shouldn't be here but it acknowledges the
251 // fact that architecturally, we are using parsing (read on below to understand why).
253 pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> TokenStream {
254 // A `Nonterminal` is often a parsed AST item. At this point we now
255 // need to convert the parsed AST to an actual token stream, e.g.
256 // un-parse it basically.
258 // Unfortunately there's not really a great way to do that in a
259 // guaranteed lossless fashion right now. The fallback here is to just
260 // stringify the AST node and reparse it, but this loses all span
263 // As a result, some AST nodes are annotated with the token stream they
264 // came from. Here we attempt to extract these lossless token streams
265 // before we fall back to the stringification.
266 let tokens = match *nt {
267 Nonterminal::NtItem(ref item) => {
268 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
270 Nonterminal::NtBlock(ref block) => block.tokens.clone(),
271 Nonterminal::NtStmt(ref stmt) => {
272 // FIXME: We currently only collect tokens for `:stmt`
273 // matchers in `macro_rules!` macros. When we start collecting
274 // tokens for attributes on statements, we will need to prepend
278 Nonterminal::NtPat(ref pat) => pat.tokens.clone(),
279 Nonterminal::NtTy(ref ty) => ty.tokens.clone(),
280 Nonterminal::NtIdent(ident, is_raw) => {
281 Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into())
283 Nonterminal::NtLifetime(ident) => {
284 Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into())
286 Nonterminal::NtMeta(ref attr) => attr.tokens.clone(),
287 Nonterminal::NtPath(ref path) => path.tokens.clone(),
288 Nonterminal::NtVis(ref vis) => vis.tokens.clone(),
289 Nonterminal::NtTT(ref tt) => Some(tt.clone().into()),
290 Nonterminal::NtExpr(ref expr) | Nonterminal::NtLiteral(ref expr) => {
291 if expr.tokens.is_none() {
292 debug!("missing tokens for expr {:?}", expr);
294 prepend_attrs(sess, &expr.attrs, expr.tokens.as_ref(), span)
298 // FIXME(#43081): Avoid this pretty-print + reparse hack
299 let source = pprust::nonterminal_to_string(nt);
300 let filename = FileName::macro_expansion_source_code(&source);
301 let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
303 // During early phases of the compiler the AST could get modified
304 // directly (e.g., attributes added or removed) and the internal cache
305 // of tokens my not be invalidated or updated. Consequently if the
306 // "lossless" token stream disagrees with our actual stringification
307 // (which has historically been much more battle-tested) then we go
308 // with the lossy stream anyway (losing span information).
310 // Note that the comparison isn't `==` here to avoid comparing spans,
311 // but it *also* is a "probable" equality which is a pretty weird
312 // definition. We mostly want to catch actual changes to the AST
313 // like a `#[cfg]` being processed or some weird `macro_rules!`
316 // What we *don't* want to catch is the fact that a user-defined
317 // literal like `0xf` is stringified as `15`, causing the cached token
318 // stream to not be literal `==` token-wise (ignoring spans) to the
319 // token stream we got from stringification.
321 // Instead the "probably equal" check here is "does each token
322 // recursively have the same discriminant?" We basically don't look at
323 // the token values here and assume that such fine grained token stream
324 // modifications, including adding/removing typically non-semantic
325 // tokens such as extra braces and commas, don't happen.
326 if let Some(tokens) = tokens {
327 if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real, sess) {
331 "cached tokens found, but they're not \"probably equal\", \
332 going with stringified version"
334 info!("cached tokens: {:?}", tokens);
335 info!("reparsed tokens: {:?}", tokens_for_real);
340 // See comments in `Nonterminal::to_tokenstream` for why we care about
341 // *probably* equal here rather than actual equality
343 // This is otherwise the same as `eq_unspanned`, only recursing with a
345 pub fn tokenstream_probably_equal_for_proc_macro(
350 // When checking for `probably_eq`, we ignore certain tokens that aren't
351 // preserved in the AST. Because they are not preserved, the pretty
352 // printer arbitrarily adds or removes them when printing as token
353 // streams, making a comparison between a token stream generated from an
354 // AST and a token stream which was parsed into an AST more reliable.
355 fn semantic_tree(tree: &TokenTree) -> bool {
356 if let TokenTree::Token(token) = tree {
358 // The pretty printer tends to add trailing commas to
359 // everything, and in particular, after struct fields.
361 // The pretty printer emits `NoDelim` as whitespace.
362 | token::OpenDelim(DelimToken::NoDelim)
363 | token::CloseDelim(DelimToken::NoDelim)
364 // The pretty printer collapses many semicolons into one.
366 // We don't preserve leading `|` tokens in patterns, so
367 // we ignore them entirely
368 | token::BinOp(token::BinOpToken::Or)
369 // We don't preserve trailing '+' tokens in trait bounds,
370 // so we ignore them entirely
371 | token::BinOp(token::BinOpToken::Plus)
372 // The pretty printer can turn `$crate` into `::crate_name`
373 | token::ModSep = token.kind {
380 // When comparing two `TokenStream`s, we ignore the `IsJoint` information.
382 // However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
383 // use `Token.glue` on adjacent tokens with the proper `IsJoint`.
384 // Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
385 // and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
386 // when determining if two `TokenStream`s are 'probably equal'.
388 // Therefore, we use `break_two_token_op` to convert all tokens
389 // to the 'unglued' form (if it exists). This ensures that two
390 // `TokenStream`s which differ only in how their tokens are glued
391 // will be considered 'probably equal', which allows us to keep spans.
393 // This is important when the original `TokenStream` contained
394 // extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
395 // will be omitted when we pretty-print, which can cause the original
396 // and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
397 // leading to some tokens being 'glued' together in one stream but not
398 // the other. See #68489 for more details.
399 fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
400 // In almost all cases, we should have either zero or one levels
401 // of 'unglueing'. However, in some unusual cases, we may need
402 // to iterate breaking tokens mutliple times. For example:
403 // '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
404 let mut token_trees: SmallVec<[_; 2]>;
405 if let TokenTree::Token(token) = &tree {
406 let mut out = SmallVec::<[_; 2]>::new();
407 out.push(token.clone());
408 // Iterate to fixpoint:
409 // * We start off with 'out' containing our initial token, and `temp` empty
410 // * If we are able to break any tokens in `out`, then `out` will have
411 // at least one more element than 'temp', so we will try to break tokens
413 // * If we cannot break any tokens in 'out', we are done
415 let mut temp = SmallVec::<[_; 2]>::new();
416 let mut changed = false;
418 for token in out.into_iter() {
419 if let Some((first, second)) = token.kind.break_two_token_op() {
420 temp.push(Token::new(first, DUMMY_SP));
421 temp.push(Token::new(second, DUMMY_SP));
432 token_trees = out.into_iter().map(TokenTree::Token).collect();
434 token_trees = SmallVec::new();
435 token_trees.push(tree);
437 token_trees.into_iter()
440 let expand_nt = |tree: TokenTree| {
441 if let TokenTree::Token(Token { kind: TokenKind::Interpolated(nt), span }) = &tree {
442 // When checking tokenstreams for 'probable equality', we are comparing
443 // a captured (from parsing) `TokenStream` to a reparsed tokenstream.
444 // The reparsed Tokenstream will never have `None`-delimited groups,
445 // since they are only ever inserted as a result of macro expansion.
446 // Therefore, inserting a `None`-delimtied group here (when we
447 // convert a nested `Nonterminal` to a tokenstream) would cause
448 // a mismatch with the reparsed tokenstream.
450 // Note that we currently do not handle the case where the
451 // reparsed stream has a `Parenthesis`-delimited group
452 // inserted. This will cause a spurious mismatch:
453 // issue #75734 tracks resolving this.
454 nt_to_tokenstream(nt, sess, *span).into_trees()
456 TokenStream::new(vec![(tree, Spacing::Alone)]).into_trees()
460 // Break tokens after we expand any nonterminals, so that we break tokens
461 // that are produced as a result of nonterminal expansion.
462 let mut t1 = first.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
463 let mut t2 = other.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
464 for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
465 if !tokentree_probably_equal_for_proc_macro(&t1, &t2, sess) {
469 t1.next().is_none() && t2.next().is_none()
472 // See comments in `Nonterminal::to_tokenstream` for why we care about
473 // *probably* equal here rather than actual equality
475 // This is otherwise the same as `eq_unspanned`, only recursing with a
477 pub fn tokentree_probably_equal_for_proc_macro(
482 match (first, other) {
483 (TokenTree::Token(token), TokenTree::Token(token2)) => {
484 token_probably_equal_for_proc_macro(token, token2)
486 (TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => {
487 delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2, sess)
493 // See comments in `Nonterminal::to_tokenstream` for why we care about
494 // *probably* equal here rather than actual equality
495 fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bool {
496 if mem::discriminant(&first.kind) != mem::discriminant(&other.kind) {
499 use rustc_ast::token::TokenKind::*;
500 match (&first.kind, &other.kind) {
515 | (&DotDotDot, &DotDotDot)
516 | (&DotDotEq, &DotDotEq)
523 | (&FatArrow, &FatArrow)
526 | (&Question, &Question)
527 | (&Eof, &Eof) => true,
529 (&BinOp(a), &BinOp(b)) | (&BinOpEq(a), &BinOpEq(b)) => a == b,
531 (&OpenDelim(a), &OpenDelim(b)) | (&CloseDelim(a), &CloseDelim(b)) => a == b,
533 (&DocComment(a1, a2, a3), &DocComment(b1, b2, b3)) => a1 == b1 && a2 == b2 && a3 == b3,
535 (&Literal(a), &Literal(b)) => a == b,
537 (&Lifetime(a), &Lifetime(b)) => a == b,
538 (&Ident(a, b), &Ident(c, d)) => {
539 b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate)
542 (&Interpolated(..), &Interpolated(..)) => panic!("Unexpanded Interpolated!"),
544 _ => panic!("forgot to add a token?"),
550 attrs: &[ast::Attribute],
551 tokens: Option<&tokenstream::TokenStream>,
552 span: rustc_span::Span,
553 ) -> Option<tokenstream::TokenStream> {
554 let tokens = tokens?;
555 if attrs.is_empty() {
556 return Some(tokens.clone());
558 let mut builder = tokenstream::TokenStreamBuilder::new();
562 ast::AttrStyle::Outer,
563 "inner attributes should prevent cached tokens from existing"
566 let source = pprust::attribute_to_string(attr);
567 let macro_filename = FileName::macro_expansion_source_code(&source);
569 let item = match attr.kind {
570 ast::AttrKind::Normal(ref item) => item,
571 ast::AttrKind::DocComment(..) => {
572 let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
573 builder.push(stream);
578 // synthesize # [ $path $tokens ] manually here
579 let mut brackets = tokenstream::TokenStreamBuilder::new();
581 // For simple paths, push the identifier directly
582 if item.path.segments.len() == 1 && item.path.segments[0].args.is_none() {
583 let ident = item.path.segments[0].ident;
584 let token = token::Ident(ident.name, ident.as_str().starts_with("r#"));
585 brackets.push(tokenstream::TokenTree::token(token, ident.span));
587 // ... and for more complicated paths, fall back to a reparse hack that
588 // should eventually be removed.
590 let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
591 brackets.push(stream);
594 brackets.push(item.args.outer_tokens());
596 // The span we list here for `#` and for `[ ... ]` are both wrong in
597 // that it encompasses more than each token, but it hopefully is "good
598 // enough" for now at least.
599 builder.push(tokenstream::TokenTree::token(token::Pound, attr.span));
600 let delim_span = tokenstream::DelimSpan::from_single(attr.span);
601 builder.push(tokenstream::TokenTree::Delimited(
603 token::DelimToken::Bracket,
607 builder.push(tokens.clone());
608 Some(builder.build())