crates/syntax/src/lib.rs

   1 //! Syntax Tree library used throughout the rust analyzer.
   2 //!
   3 //! Properties:
   4 //!   - easy and fast incremental re-parsing
   5 //!   - graceful handling of errors
   6 //!   - full-fidelity representation (*any* text can be precisely represented as
   7 //!     a syntax tree)
   8 //!
   9 //! For more information, see the [RFC]. Current implementation is inspired by
  10 //! the [Swift] one.
  11 //!
  12 //! The most interesting modules here are `syntax_node` (which defines concrete
  13 //! syntax tree) and `ast` (which defines abstract syntax tree on top of the
  14 //! CST). The actual parser live in a separate `parser` crate, though the
  15 //! lexer lives in this crate.
  16 //!
  17 //! See `api_walkthrough` test in this file for a quick API tour!
  18 //!
  19 //! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
  20 //! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>
  21
  22 #[allow(unused)]
  23 macro_rules! eprintln {
  24     ($($tt:tt)*) => { stdx::eprintln!($($tt)*) };
  25 }
  26
  27 mod syntax_node;
  28 mod syntax_error;
  29 mod parsing;
  30 mod validation;
  31 mod ptr;
  32 mod token_text;
  33 #[cfg(test)]
  34 mod tests;
  35
  36 pub mod display;
  37 pub mod algo;
  38 pub mod ast;
  39 #[doc(hidden)]
  40 pub mod fuzz;
  41 pub mod utils;
  42 pub mod ted;
  43
  44 use std::{marker::PhantomData, sync::Arc};
  45
  46 use stdx::format_to;
  47 use text_edit::Indel;
  48
  49 pub use crate::{
  50     ast::{AstNode, AstToken},
  51     parsing::lexer::{lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token},
  52     ptr::{AstPtr, SyntaxNodePtr},
  53     syntax_error::SyntaxError,
  54     syntax_node::{
  55         SyntaxElement, SyntaxElementChildren, SyntaxNode, SyntaxNodeChildren, SyntaxToken,
  56         SyntaxTreeBuilder,
  57     },
  58     token_text::TokenText,
  59 };
  60 pub use parser::{SyntaxKind, T};
  61 pub use rowan::{
  62     Direction, GreenNode, NodeOrToken, SyntaxText, TextRange, TextSize, TokenAtOffset, WalkEvent,
  63 };
  64 pub use smol_str::SmolStr;
  65
  66 /// `Parse` is the result of the parsing: a syntax tree and a collection of
  67 /// errors.
  68 ///
  69 /// Note that we always produce a syntax tree, even for completely invalid
  70 /// files.
  71 #[derive(Debug, PartialEq, Eq)]
  72 pub struct Parse<T> {
  73     green: GreenNode,
  74     errors: Arc<Vec<SyntaxError>>,
  75     _ty: PhantomData<fn() -> T>,
  76 }
  77
  78 impl<T> Clone for Parse<T> {
  79     fn clone(&self) -> Parse<T> {
  80         Parse { green: self.green.clone(), errors: self.errors.clone(), _ty: PhantomData }
  81     }
  82 }
  83
  84 impl<T> Parse<T> {
  85     fn new(green: GreenNode, errors: Vec<SyntaxError>) -> Parse<T> {
  86         Parse { green, errors: Arc::new(errors), _ty: PhantomData }
  87     }
  88
  89     pub fn syntax_node(&self) -> SyntaxNode {
  90         SyntaxNode::new_root(self.green.clone())
  91     }
  92 }
  93
  94 impl<T: AstNode> Parse<T> {
  95     pub fn to_syntax(self) -> Parse<SyntaxNode> {
  96         Parse { green: self.green, errors: self.errors, _ty: PhantomData }
  97     }
  98
  99     pub fn tree(&self) -> T {
 100         T::cast(self.syntax_node()).unwrap()
 101     }
 102
 103     pub fn errors(&self) -> &[SyntaxError] {
 104         &*self.errors
 105     }
 106
 107     pub fn ok(self) -> Result<T, Arc<Vec<SyntaxError>>> {
 108         if self.errors.is_empty() {
 109             Ok(self.tree())
 110         } else {
 111             Err(self.errors)
 112         }
 113     }
 114 }
 115
 116 impl Parse<SyntaxNode> {
 117     pub fn cast<N: AstNode>(self) -> Option<Parse<N>> {
 118         if N::cast(self.syntax_node()).is_some() {
 119             Some(Parse { green: self.green, errors: self.errors, _ty: PhantomData })
 120         } else {
 121             None
 122         }
 123     }
 124 }
 125
 126 impl Parse<SourceFile> {
 127     pub fn debug_dump(&self) -> String {
 128         let mut buf = format!("{:#?}", self.tree().syntax());
 129         for err in self.errors.iter() {
 130             format_to!(buf, "error {:?}: {}\n", err.range(), err);
 131         }
 132         buf
 133     }
 134
 135     pub fn reparse(&self, indel: &Indel) -> Parse<SourceFile> {
 136         self.incremental_reparse(indel).unwrap_or_else(|| self.full_reparse(indel))
 137     }
 138
 139     fn incremental_reparse(&self, indel: &Indel) -> Option<Parse<SourceFile>> {
 140         // FIXME: validation errors are not handled here
 141         parsing::incremental_reparse(self.tree().syntax(), indel, self.errors.to_vec()).map(
 142             |(green_node, errors, _reparsed_range)| Parse {
 143                 green: green_node,
 144                 errors: Arc::new(errors),
 145                 _ty: PhantomData,
 146             },
 147         )
 148     }
 149
 150     fn full_reparse(&self, indel: &Indel) -> Parse<SourceFile> {
 151         let mut text = self.tree().syntax().text().to_string();
 152         indel.apply(&mut text);
 153         SourceFile::parse(&text)
 154     }
 155 }
 156
 157 /// `SourceFile` represents a parse tree for a single Rust file.
 158 pub use crate::ast::SourceFile;
 159
 160 impl SourceFile {
 161     pub fn parse(text: &str) -> Parse<SourceFile> {
 162         let (green, mut errors) = parsing::parse_text(text);
 163         let root = SyntaxNode::new_root(green.clone());
 164
 165         if cfg!(debug_assertions) {
 166             validation::validate_block_structure(&root);
 167         }
 168
 169         errors.extend(validation::validate(&root));
 170
 171         assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
 172         Parse { green, errors: Arc::new(errors), _ty: PhantomData }
 173     }
 174 }
 175
 176 // FIXME: `parse` functions shouldn't hang directly from AST nodes, and they
 177 // shouldn't return `Result`.
 178 //
 179 // We need a dedicated module for parser entry points, and they should always
 180 // return `Parse`.
 181
 182 impl ast::Path {
 183     /// Returns `text`, parsed as a path, but only if it has no errors.
 184     pub fn parse(text: &str) -> Result<Self, ()> {
 185         parsing::parse_text_as(text, parser::ParserEntryPoint::Path)
 186     }
 187 }
 188
 189 impl ast::Pat {
 190     /// Returns `text`, parsed as a pattern, but only if it has no errors.
 191     pub fn parse(text: &str) -> Result<Self, ()> {
 192         parsing::parse_text_as(text, parser::ParserEntryPoint::Pattern)
 193     }
 194 }
 195
 196 impl ast::Expr {
 197     /// Returns `text`, parsed as an expression, but only if it has no errors.
 198     pub fn parse(text: &str) -> Result<Self, ()> {
 199         parsing::parse_text_as(text, parser::ParserEntryPoint::Expr)
 200     }
 201 }
 202
 203 impl ast::Item {
 204     /// Returns `text`, parsed as an item, but only if it has no errors.
 205     pub fn parse(text: &str) -> Result<Self, ()> {
 206         parsing::parse_text_as(text, parser::ParserEntryPoint::Item)
 207     }
 208 }
 209
 210 impl ast::Type {
 211     /// Returns `text`, parsed as an type reference, but only if it has no errors.
 212     pub fn parse(text: &str) -> Result<Self, ()> {
 213         parsing::parse_text_as(text, parser::ParserEntryPoint::Type)
 214     }
 215 }
 216
 217 impl ast::Attr {
 218     /// Returns `text`, parsed as an attribute, but only if it has no errors.
 219     pub fn parse(text: &str) -> Result<Self, ()> {
 220         parsing::parse_text_as(text, parser::ParserEntryPoint::Attr)
 221     }
 222 }
 223
 224 impl ast::Stmt {
 225     /// Returns `text`, parsed as statement, but only if it has no errors.
 226     pub fn parse(text: &str) -> Result<Self, ()> {
 227         parsing::parse_text_as(text, parser::ParserEntryPoint::StatementOptionalSemi)
 228     }
 229 }
 230
 231 /// Matches a `SyntaxNode` against an `ast` type.
 232 ///
 233 /// # Example:
 234 ///
 235 /// ```ignore
 236 /// match_ast! {
 237 ///     match node {
 238 ///         ast::CallExpr(it) => { ... },
 239 ///         ast::MethodCallExpr(it) => { ... },
 240 ///         ast::MacroCall(it) => { ... },
 241 ///         _ => None,
 242 ///     }
 243 /// }
 244 /// ```
 245 #[macro_export]
 246 macro_rules! match_ast {
 247     (match $node:ident { $($tt:tt)* }) => { match_ast!(match ($node) { $($tt)* }) };
 248
 249     (match ($node:expr) {
 250         $( ast::$ast:ident($it:ident) => $res:expr, )*
 251         _ => $catch_all:expr $(,)?
 252     }) => {{
 253         $( if let Some($it) = ast::$ast::cast($node.clone()) { $res } else )*
 254         { $catch_all }
 255     }};
 256 }
 257
 258 /// This test does not assert anything and instead just shows off the crate's
 259 /// API.
 260 #[test]
 261 fn api_walkthrough() {
 262     use ast::{ModuleItemOwner, NameOwner};
 263
 264     let source_code = "
 265         fn foo() {
 266             1 + 1
 267         }
 268     ";
 269     // `SourceFile` is the main entry point.
 270     //
 271     // The `parse` method returns a `Parse` -- a pair of syntax tree and a list
 272     // of errors. That is, syntax tree is constructed even in presence of errors.
 273     let parse = SourceFile::parse(source_code);
 274     assert!(parse.errors().is_empty());
 275
 276     // The `tree` method returns an owned syntax node of type `SourceFile`.
 277     // Owned nodes are cheap: inside, they are `Rc` handles to the underling data.
 278     let file: SourceFile = parse.tree();
 279
 280     // `SourceFile` is the root of the syntax tree. We can iterate file's items.
 281     // Let's fetch the `foo` function.
 282     let mut func = None;
 283     for item in file.items() {
 284         match item {
 285             ast::Item::Fn(f) => func = Some(f),
 286             _ => unreachable!(),
 287         }
 288     }
 289     let func: ast::Fn = func.unwrap();
 290
 291     // Each AST node has a bunch of getters for children. All getters return
 292     // `Option`s though, to account for incomplete code. Some getters are common
 293     // for several kinds of node. In this case, a trait like `ast::NameOwner`
 294     // usually exists. By convention, all ast types should be used with `ast::`
 295     // qualifier.
 296     let name: Option<ast::Name> = func.name();
 297     let name = name.unwrap();
 298     assert_eq!(name.text(), "foo");
 299
 300     // Let's get the `1 + 1` expression!
 301     let body: ast::BlockExpr = func.body().unwrap();
 302     let expr: ast::Expr = body.tail_expr().unwrap();
 303
 304     // Enums are used to group related ast nodes together, and can be used for
 305     // matching. However, because there are no public fields, it's possible to
 306     // match only the top level enum: that is the price we pay for increased API
 307     // flexibility
 308     let bin_expr: &ast::BinExpr = match &expr {
 309         ast::Expr::BinExpr(e) => e,
 310         _ => unreachable!(),
 311     };
 312
 313     // Besides the "typed" AST API, there's an untyped CST one as well.
 314     // To switch from AST to CST, call `.syntax()` method:
 315     let expr_syntax: &SyntaxNode = expr.syntax();
 316
 317     // Note how `expr` and `bin_expr` are in fact the same node underneath:
 318     assert!(expr_syntax == bin_expr.syntax());
 319
 320     // To go from CST to AST, `AstNode::cast` function is used:
 321     let _expr: ast::Expr = match ast::Expr::cast(expr_syntax.clone()) {
 322         Some(e) => e,
 323         None => unreachable!(),
 324     };
 325
 326     // The two properties each syntax node has is a `SyntaxKind`:
 327     assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR);
 328
 329     // And text range:
 330     assert_eq!(expr_syntax.text_range(), TextRange::new(32.into(), 37.into()));
 331
 332     // You can get node's text as a `SyntaxText` object, which will traverse the
 333     // tree collecting token's text:
 334     let text: SyntaxText = expr_syntax.text();
 335     assert_eq!(text.to_string(), "1 + 1");
 336
 337     // There's a bunch of traversal methods on `SyntaxNode`:
 338     assert_eq!(expr_syntax.parent().as_ref(), Some(body.syntax()));
 339     assert_eq!(body.syntax().first_child_or_token().map(|it| it.kind()), Some(T!['{']));
 340     assert_eq!(
 341         expr_syntax.next_sibling_or_token().map(|it| it.kind()),
 342         Some(SyntaxKind::WHITESPACE)
 343     );
 344
 345     // As well as some iterator helpers:
 346     let f = expr_syntax.ancestors().find_map(ast::Fn::cast);
 347     assert_eq!(f, Some(func));
 348     assert!(expr_syntax.siblings_with_tokens(Direction::Next).any(|it| it.kind() == T!['}']));
 349     assert_eq!(
 350         expr_syntax.descendants_with_tokens().count(),
 351         8, // 5 tokens `1`, ` `, `+`, ` `, `!`
 352            // 2 child literal expressions: `1`, `1`
 353            // 1 the node itself: `1 + 1`
 354     );
 355
 356     // There's also a `preorder` method with a more fine-grained iteration control:
 357     let mut buf = String::new();
 358     let mut indent = 0;
 359     for event in expr_syntax.preorder_with_tokens() {
 360         match event {
 361             WalkEvent::Enter(node) => {
 362                 let text = match &node {
 363                     NodeOrToken::Node(it) => it.text().to_string(),
 364                     NodeOrToken::Token(it) => it.text().to_string(),
 365                 };
 366                 format_to!(buf, "{:indent$}{:?} {:?}\n", " ", text, node.kind(), indent = indent);
 367                 indent += 2;
 368             }
 369             WalkEvent::Leave(_) => indent -= 2,
 370         }
 371     }
 372     assert_eq!(indent, 0);
 373     assert_eq!(
 374         buf.trim(),
 375         r#"
 376 "1 + 1" BIN_EXPR
 377   "1" LITERAL
 378     "1" INT_NUMBER
 379   " " WHITESPACE
 380   "+" PLUS
 381   " " WHITESPACE
 382   "1" LITERAL
 383     "1" INT_NUMBER
 384 "#
 385         .trim()
 386     );
 387
 388     // To recursively process the tree, there are three approaches:
 389     // 1. explicitly call getter methods on AST nodes.
 390     // 2. use descendants and `AstNode::cast`.
 391     // 3. use descendants and `match_ast!`.
 392     //
 393     // Here's how the first one looks like:
 394     let exprs_cast: Vec<String> = file
 395         .syntax()
 396         .descendants()
 397         .filter_map(ast::Expr::cast)
 398         .map(|expr| expr.syntax().text().to_string())
 399         .collect();
 400
 401     // An alternative is to use a macro.
 402     let mut exprs_visit = Vec::new();
 403     for node in file.syntax().descendants() {
 404         match_ast! {
 405             match node {
 406                 ast::Expr(it) => {
 407                     let res = it.syntax().text().to_string();
 408                     exprs_visit.push(res);
 409                 },
 410                 _ => (),
 411             }
 412         }
 413     }
 414     assert_eq!(exprs_cast, exprs_visit);
 415 }