1 //! This module generates AST datatype used by rust-analyzer.
3 //! Specifically, it generates the `SyntaxKind` enum and a number of newtype
4 //! wrappers around `SyntaxNode` which implement `syntax::AstNode`.
7 collections::{BTreeSet, HashSet},
11 use proc_macro2::{Punct, Spacing};
12 use quote::{format_ident, quote};
13 use ungrammar::{rust_grammar, Grammar, Rule};
16 ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC},
17 codegen::{self, reformat, update, Mode},
21 pub fn generate_syntax(mode: Mode) -> Result<()> {
22 let grammar = rust_grammar();
23 let ast = lower(&grammar);
25 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
26 let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?;
27 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
29 let ast_tokens_file = project_root().join(codegen::AST_TOKENS);
30 let contents = generate_tokens(&ast)?;
31 update(ast_tokens_file.as_path(), &contents, mode)?;
33 let ast_nodes_file = project_root().join(codegen::AST_NODES);
34 let contents = generate_nodes(KINDS_SRC, &ast)?;
35 update(ast_nodes_file.as_path(), &contents, mode)?;
40 fn generate_tokens(grammar: &AstSrc) -> Result<String> {
41 let tokens = grammar.tokens.iter().map(|token| {
42 let name = format_ident!("{}", token);
43 let kind = format_ident!("{}", to_upper_snake_case(token));
45 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
47 pub(crate) syntax: SyntaxToken,
49 impl std::fmt::Display for #name {
50 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51 std::fmt::Display::fmt(&self.syntax, f)
54 impl AstToken for #name {
55 fn can_cast(kind: SyntaxKind) -> bool { kind == #kind }
56 fn cast(syntax: SyntaxToken) -> Option<Self> {
57 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
59 fn syntax(&self) -> &SyntaxToken { &self.syntax }
64 let pretty = reformat(quote! {
65 use crate::{SyntaxKind::{self, *}, SyntaxToken, ast::AstToken};
68 .replace("#[derive", "\n#[derive");
72 fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result<String> {
73 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
77 let name = format_ident!("{}", node.name);
78 let kind = format_ident!("{}", to_upper_snake_case(&node.name));
79 let traits = node.traits.iter().map(|trait_name| {
80 let trait_name = format_ident!("{}", trait_name);
81 quote!(impl ast::#trait_name for #name {})
84 let methods = node.fields.iter().map(|field| {
85 let method_name = field.method_name();
90 pub fn #method_name(&self) -> AstChildren<#ty> {
91 support::children(&self.syntax)
95 if let Some(token_kind) = field.token_kind() {
97 pub fn #method_name(&self) -> Option<#ty> {
98 support::token(&self.syntax, #token_kind)
103 pub fn #method_name(&self) -> Option<#ty> {
104 support::child(&self.syntax)
112 #[pretty_doc_comment_placeholder_workaround]
113 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
115 pub(crate) syntax: SyntaxNode,
125 impl AstNode for #name {
126 fn can_cast(kind: SyntaxKind) -> bool {
129 fn cast(syntax: SyntaxNode) -> Option<Self> {
130 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
132 fn syntax(&self) -> &SyntaxNode { &self.syntax }
139 let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
143 let variants: Vec<_> = en.variants.iter().map(|var| format_ident!("{}", var)).collect();
144 let name = format_ident!("{}", en.name);
145 let kinds: Vec<_> = variants
147 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
149 let traits = en.traits.iter().map(|trait_name| {
150 let trait_name = format_ident!("{}", trait_name);
151 quote!(impl ast::#trait_name for #name {})
154 let ast_node = if en.name == "Stmt" {
158 impl AstNode for #name {
159 fn can_cast(kind: SyntaxKind) -> bool {
165 fn cast(syntax: SyntaxNode) -> Option<Self> {
166 let res = match syntax.kind() {
168 #kinds => #name::#variants(#variants { syntax }),
174 fn syntax(&self) -> &SyntaxNode {
177 #name::#variants(it) => &it.syntax,
187 #[pretty_doc_comment_placeholder_workaround]
188 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
190 #(#variants(#variants),)*
197 impl From<#variants> for #name {
198 fn from(node: #variants) -> #name {
199 #name::#variants(node)
209 let enum_names = grammar.enums.iter().map(|it| &it.name);
210 let node_names = grammar.nodes.iter().map(|it| &it.name);
213 enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| {
215 impl std::fmt::Display for #name {
216 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
217 std::fmt::Display::fmt(self.syntax(), f)
223 let defined_nodes: HashSet<_> = node_names.collect();
228 .map(|kind| to_pascal_case(kind))
229 .filter(|name| !defined_nodes.iter().any(|&it| it == name))
232 // TODO: restore this
233 // eprintln!("Warning: node {} not defined in ast source", node);
238 SyntaxNode, SyntaxToken, SyntaxKind::{self, *},
239 ast::{self, AstNode, AstChildren, support},
245 #(#node_boilerplate_impls)*
246 #(#enum_boilerplate_impls)*
250 let ast = ast.to_string().replace("T ! [ ", "T![").replace(" ] )", "])");
252 let mut res = String::with_capacity(ast.len() * 2);
255 grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc));
257 for chunk in ast.split("# [ pretty_doc_comment_placeholder_workaround ]") {
259 if let Some(doc) = docs.next() {
260 write_doc_comment(&doc, &mut res);
264 let pretty = reformat(res)?;
268 fn write_doc_comment(contents: &[String], dest: &mut String) {
269 for line in contents {
270 writeln!(dest, "///{}", line).unwrap();
274 fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> Result<String> {
275 let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
278 .filter(|(token, _name)| token.len() == 1)
279 .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
282 let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
283 if "{}[]()".contains(token) {
284 let c = token.chars().next().unwrap();
287 let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
292 grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
294 let full_keywords_values = &grammar.keywords;
296 full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(&kw)));
298 let all_keywords_values =
299 grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::<Vec<_>>();
300 let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw));
301 let all_keywords = all_keywords_values
303 .map(|name| format_ident!("{}_KW", to_upper_snake_case(&name)))
304 .collect::<Vec<_>>();
307 grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
309 let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
311 let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
314 #![allow(bad_style, missing_docs, unreachable_pub)]
315 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.
316 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
318 pub enum SyntaxKind {
319 // Technical SyntaxKinds: they appear temporally during parsing,
320 // but never end up in the final tree
331 // Technical kind so that we can cast from u16 safely
335 use self::SyntaxKind::*;
338 pub fn is_keyword(self) -> bool {
340 #(#all_keywords)|* => true,
345 pub fn is_punct(self) -> bool {
347 #(#punctuation)|* => true,
352 pub fn is_literal(self) -> bool {
354 #(#literals)|* => true,
359 pub fn from_keyword(ident: &str) -> Option<SyntaxKind> {
360 let kw = match ident {
361 #(#full_keywords_values => #full_keywords,)*
367 pub fn from_char(c: char) -> Option<SyntaxKind> {
369 #(#single_byte_tokens_values => #single_byte_tokens,)*
378 #([#punctuation_values] => { $crate::SyntaxKind::#punctuation };)*
379 #([#all_keywords_idents] => { $crate::SyntaxKind::#all_keywords };)*
380 [lifetime] => { $crate::SyntaxKind::LIFETIME };
381 [ident] => { $crate::SyntaxKind::IDENT };
382 [shebang] => { $crate::SyntaxKind::SHEBANG };
389 fn to_upper_snake_case(s: &str) -> String {
390 let mut buf = String::with_capacity(s.len());
391 let mut prev = false;
393 if c.is_ascii_uppercase() && prev {
398 buf.push(c.to_ascii_uppercase());
403 fn to_lower_snake_case(s: &str) -> String {
404 let mut buf = String::with_capacity(s.len());
405 let mut prev = false;
407 if c.is_ascii_uppercase() && prev {
412 buf.push(c.to_ascii_lowercase());
417 fn to_pascal_case(s: &str) -> String {
418 let mut buf = String::with_capacity(s.len());
419 let mut prev_is_underscore = true;
422 prev_is_underscore = true;
423 } else if prev_is_underscore {
424 buf.push(c.to_ascii_uppercase());
425 prev_is_underscore = false;
427 buf.push(c.to_ascii_lowercase());
433 fn pluralize(s: &str) -> String {
438 fn is_many(&self) -> bool {
439 matches!(self, Field::Node { cardinality: Cardinality::Many, .. })
441 fn token_kind(&self) -> Option<proc_macro2::TokenStream> {
443 Field::Token(token) => {
444 let token: proc_macro2::TokenStream = token.parse().unwrap();
445 Some(quote! { T![#token] })
450 fn method_name(&self) -> proc_macro2::Ident {
452 Field::Token(name) => {
453 let name = match name.as_str() {
455 "->" => "thin_arrow",
471 "..." => "dotdotdot",
476 "::" => "coloncolon",
478 "?" => "question_mark",
482 format_ident!("{}_token", name)
484 Field::Node { name, .. } => {
488 format_ident!("{}", name)
493 fn ty(&self) -> proc_macro2::Ident {
495 Field::Token(_) => format_ident!("SyntaxToken"),
496 Field::Node { ty, .. } => format_ident!("{}", ty),
501 fn lower(grammar: &Grammar) -> AstSrc {
502 let mut res = AstSrc::default();
503 res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()];
505 let nodes = grammar.iter().collect::<Vec<_>>();
507 for &node in &nodes {
508 let name = grammar[node].name.clone();
509 let rule = &grammar[node].rule;
510 match lower_enum(grammar, rule) {
512 let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
513 res.enums.push(enum_src);
516 let mut fields = Vec::new();
517 lower_rule(&mut fields, grammar, None, rule);
518 res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
523 deduplicate_fields(&mut res);
524 extract_enums(&mut res);
525 extract_struct_traits(&mut res);
526 extract_enum_traits(&mut res);
530 fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
531 let alternatives = match rule {
535 let mut variants = Vec::new();
536 for alternative in alternatives {
538 Rule::Node(it) => variants.push(grammar[*it].name.clone()),
539 Rule::Token(it) if grammar[*it].name == ";" => (),
546 fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, rule: &Rule) {
547 if lower_comma_list(acc, grammar, label, rule) {
552 Rule::Node(node) => {
553 let ty = grammar[*node].name.clone();
554 let name = label.cloned().unwrap_or_else(|| to_lower_snake_case(&ty));
555 let field = Field::Node { name, ty, cardinality: Cardinality::Optional };
558 Rule::Token(token) => {
559 assert!(label.is_none());
560 let mut name = grammar[*token].name.clone();
561 if name != "int_number" && name != "string" {
562 if "[]{}()".contains(&name) {
563 name = format!("'{}'", name);
565 let field = Field::Token(name);
569 Rule::Rep(inner) => {
570 if let Rule::Node(node) = &**inner {
571 let ty = grammar[*node].name.clone();
572 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
573 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
579 Rule::Labeled { label: l, rule } => {
580 assert!(label.is_none());
581 let manually_implemented = matches!(
596 if manually_implemented {
599 lower_rule(acc, grammar, Some(l), rule);
601 Rule::Seq(rules) | Rule::Alt(rules) => {
603 lower_rule(acc, grammar, label, rule)
606 Rule::Opt(rule) => lower_rule(acc, grammar, label, rule),
612 acc: &mut Vec<Field>,
614 label: Option<&String>,
617 let rule = match rule {
621 let (node, repeat, trailing_comma) = match rule.as_slice() {
622 [Rule::Node(node), Rule::Rep(repeat), Rule::Opt(trailing_comma)] => {
623 (node, repeat, trailing_comma)
627 let repeat = match &**repeat {
631 match repeat.as_slice() {
632 [comma, Rule::Node(n)] if comma == &**trailing_comma && n == node => (),
635 let ty = grammar[*node].name.clone();
636 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
637 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
642 fn deduplicate_fields(ast: &mut AstSrc) {
643 for node in &mut ast.nodes {
645 'outer: while i < node.fields.len() {
647 let f1 = &node.fields[i];
648 let f2 = &node.fields[j];
650 node.fields.remove(i);
659 fn extract_enums(ast: &mut AstSrc) {
660 for node in &mut ast.nodes {
661 for enm in &ast.enums {
662 let mut to_remove = Vec::new();
663 for (i, field) in node.fields.iter().enumerate() {
664 let ty = field.ty().to_string();
665 if enm.variants.iter().any(|it| it == &ty) {
669 if to_remove.len() == enm.variants.len() {
670 node.remove_field(to_remove);
671 let ty = enm.name.clone();
672 let name = to_lower_snake_case(&ty);
673 node.fields.push(Field::Node { name, ty, cardinality: Cardinality::Optional });
679 fn extract_struct_traits(ast: &mut AstSrc) {
680 let traits: &[(&str, &[&str])] = &[
681 ("AttrsOwner", &["attrs"]),
682 ("NameOwner", &["name"]),
683 ("VisibilityOwner", &["visibility"]),
684 ("GenericParamsOwner", &["generic_param_list", "where_clause"]),
685 ("TypeBoundsOwner", &["type_bound_list", "colon_token"]),
686 ("ModuleItemOwner", &["items"]),
687 ("LoopBodyOwner", &["label", "loop_body"]),
688 ("ArgListOwner", &["arg_list"]),
691 for node in &mut ast.nodes {
692 for (name, methods) in traits {
693 extract_struct_trait(node, name, methods);
698 fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
699 let mut to_remove = Vec::new();
700 for (i, field) in node.fields.iter().enumerate() {
701 let method_name = field.method_name().to_string();
702 if methods.iter().any(|&it| it == &method_name) {
706 if to_remove.len() == methods.len() {
707 node.traits.push(trait_name.to_string());
708 node.remove_field(to_remove);
712 fn extract_enum_traits(ast: &mut AstSrc) {
713 for enm in &mut ast.enums {
714 if enm.name == "Stmt" {
717 let nodes = &ast.nodes;
718 let mut variant_traits = enm
721 .map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
722 .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
724 let mut enum_traits = match variant_traits.next() {
728 for traits in variant_traits {
729 enum_traits = enum_traits.intersection(&traits).cloned().collect();
731 enm.traits = enum_traits.into_iter().collect();
736 fn remove_field(&mut self, to_remove: Vec<usize>) {
737 to_remove.into_iter().rev().for_each(|idx| {
738 self.fields.remove(idx);