1 //! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].
3 use rustc_hash::FxHashMap;
4 use stdx::{always, non_empty_vec::NonEmptyVec};
6 ast::{self, make::tokens::doc_comment},
7 AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, SyntaxKind,
9 SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, WalkEvent, T,
11 use tt::buffer::{Cursor, TokenBuffer};
13 use crate::{to_parser_input::to_parser_input, tt_iter::TtIter, TokenMap};
15 /// Convert the syntax node to a `TokenTree` (what macro
17 pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) {
18 let (subtree, token_map, _) = syntax_node_to_token_tree_with_modifications(
28 /// Convert the syntax node to a `TokenTree` (what macro will consume)
29 /// with the censored range excluded.
30 pub fn syntax_node_to_token_tree_with_modifications(
32 existing_token_map: TokenMap,
34 replace: FxHashMap<SyntaxNode, Vec<SyntheticToken>>,
35 append: FxHashMap<SyntaxNode, Vec<SyntheticToken>>,
36 ) -> (tt::Subtree, TokenMap, u32) {
37 let global_offset = node.text_range().start();
38 let mut c = Convertor::new(node, global_offset, existing_token_map, next_id, replace, append);
39 let subtree = convert_tokens(&mut c);
40 c.id_alloc.map.shrink_to_fit();
41 always!(c.replace.is_empty(), "replace: {:?}", c.replace);
42 always!(c.append.is_empty(), "append: {:?}", c.append);
43 (subtree, c.id_alloc.map, c.id_alloc.next_id)
46 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
47 pub struct SyntheticTokenId(pub u32);
49 #[derive(Debug, Clone)]
50 pub struct SyntheticToken {
54 pub id: SyntheticTokenId,
57 // The following items are what `rustc` macro can be parsed into :
58 // link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141
59 // * Expr(P<ast::Expr>) -> token_tree_to_expr
60 // * Pat(P<ast::Pat>) -> token_tree_to_pat
61 // * Ty(P<ast::Ty>) -> token_tree_to_ty
62 // * Stmts(SmallVec<[ast::Stmt; 1]>) -> token_tree_to_stmts
63 // * Items(SmallVec<[P<ast::Item>; 1]>) -> token_tree_to_items
65 // * TraitItems(SmallVec<[ast::TraitItem; 1]>)
66 // * AssocItems(SmallVec<[ast::AssocItem; 1]>)
67 // * ForeignItems(SmallVec<[ast::ForeignItem; 1]>
69 pub fn token_tree_to_syntax_node(
71 entry_point: parser::TopEntryPoint,
72 ) -> (Parse<SyntaxNode>, TokenMap) {
73 let buffer = match tt {
74 tt::Subtree { delimiter: None, token_trees } => {
75 TokenBuffer::from_tokens(token_trees.as_slice())
77 _ => TokenBuffer::from_subtree(tt),
79 let parser_input = to_parser_input(&buffer);
80 let parser_output = entry_point.parse(&parser_input);
81 let mut tree_sink = TtTreeSink::new(buffer.begin());
82 for event in parser_output.iter() {
84 parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => {
85 tree_sink.token(kind, n_raw_tokens)
87 parser::Step::Enter { kind } => tree_sink.start_node(kind),
88 parser::Step::Exit => tree_sink.finish_node(),
89 parser::Step::Error { msg } => tree_sink.error(msg.to_string()),
92 let (parse, range_map) = tree_sink.finish();
96 /// Convert a string to a `TokenTree`
97 pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> {
98 let lexed = parser::LexedStr::new(text);
99 if lexed.errors().next().is_some() {
103 let mut conv = RawConvertor {
106 id_alloc: TokenIdAlloc {
107 map: Default::default(),
108 global_offset: TextSize::default(),
113 let subtree = convert_tokens(&mut conv);
114 Some((subtree, conv.id_alloc.map))
117 /// Split token tree with separate expr: $($e:expr)SEP*
118 pub fn parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree> {
119 if tt.token_trees.is_empty() {
123 let mut iter = TtIter::new(tt);
124 let mut res = Vec::new();
126 while iter.peek_n(0).is_some() {
127 let expanded = iter.expect_fragment(parser::PrefixEntryPoint::Expr);
129 res.push(match expanded.value {
131 Some(tt @ tt::TokenTree::Leaf(_)) => {
132 tt::Subtree { delimiter: None, token_trees: vec![tt] }
134 Some(tt::TokenTree::Subtree(tt)) => tt,
137 let mut fork = iter.clone();
138 if fork.expect_char(sep).is_err() {
144 if iter.peek_n(0).is_some() {
145 res.push(tt::Subtree { delimiter: None, token_trees: iter.into_iter().cloned().collect() });
151 fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
153 subtree: tt::Subtree,
155 open_range: TextRange,
158 let entry = StackEntry {
159 subtree: tt::Subtree { delimiter: None, ..Default::default() },
160 // never used (delimiter is `None`)
162 open_range: TextRange::empty(TextSize::of('.')),
164 let mut stack = NonEmptyVec::new(entry);
167 let StackEntry { subtree, .. } = stack.last_mut();
168 let result = &mut subtree.token_trees;
169 let (token, range) = match conv.bump() {
173 let synth_id = token.synthetic_id(&conv);
175 let kind = token.kind(&conv);
177 if let Some(tokens) = conv.convert_doc_comment(&token) {
178 // FIXME: There has to be a better way to do this
179 // Add the comments token id to the converted doc string
180 let id = conv.id_alloc().alloc(range, synth_id);
181 result.extend(tokens.into_iter().map(|mut tt| {
182 if let tt::TokenTree::Subtree(sub) = &mut tt {
183 if let Some(tt::TokenTree::Leaf(tt::Leaf::Literal(lit))) =
184 sub.token_trees.get_mut(2)
194 let tt = if kind.is_punct() && kind != UNDERSCORE {
195 if synth_id.is_none() {
196 assert_eq!(range.len(), TextSize::of('.'));
199 if let Some(delim) = subtree.delimiter {
200 let expected = match delim.kind {
201 tt::DelimiterKind::Parenthesis => T![')'],
202 tt::DelimiterKind::Brace => T!['}'],
203 tt::DelimiterKind::Bracket => T![']'],
206 if kind == expected {
207 if let Some(entry) = stack.pop() {
208 conv.id_alloc().close_delim(entry.idx, Some(range));
209 stack.last_mut().subtree.token_trees.push(entry.subtree.into());
215 let delim = match kind {
216 T!['('] => Some(tt::DelimiterKind::Parenthesis),
217 T!['{'] => Some(tt::DelimiterKind::Brace),
218 T!['['] => Some(tt::DelimiterKind::Bracket),
222 if let Some(kind) = delim {
223 let mut subtree = tt::Subtree::default();
224 let (id, idx) = conv.id_alloc().open_delim(range);
225 subtree.delimiter = Some(tt::Delimiter { id, kind });
226 stack.push(StackEntry { subtree, idx, open_range: range });
230 let spacing = match conv.peek().map(|next| next.kind(&conv)) {
237 && kind != UNDERSCORE =>
241 _ => tt::Spacing::Alone,
243 let char = match token.to_char(&conv) {
246 panic!("Token from lexer must be single char: token = {:#?}", token);
249 tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range, synth_id) })
252 macro_rules! make_leaf {
254 tt::$i { id: conv.id_alloc().alloc(range, synth_id), text: token.to_text(conv) }
258 let leaf: tt::Leaf = match kind {
259 T![true] | T![false] => make_leaf!(Ident),
260 IDENT => make_leaf!(Ident),
261 UNDERSCORE => make_leaf!(Ident),
262 k if k.is_keyword() => make_leaf!(Ident),
263 k if k.is_literal() => make_leaf!(Literal),
265 let char_unit = TextSize::of('\'');
266 let r = TextRange::at(range.start(), char_unit);
267 let apostrophe = tt::Leaf::from(tt::Punct {
269 spacing: tt::Spacing::Joint,
270 id: conv.id_alloc().alloc(r, synth_id),
272 result.push(apostrophe.into());
274 let r = TextRange::at(range.start() + char_unit, range.len() - char_unit);
275 let ident = tt::Leaf::from(tt::Ident {
276 text: SmolStr::new(&token.to_text(conv)[1..]),
277 id: conv.id_alloc().alloc(r, synth_id),
279 result.push(ident.into());
290 // If we get here, we've consumed all input tokens.
291 // We might have more than one subtree in the stack, if the delimiters are improperly balanced.
292 // Merge them so we're left with one.
293 while let Some(entry) = stack.pop() {
294 let parent = stack.last_mut();
296 conv.id_alloc().close_delim(entry.idx, None);
297 let leaf: tt::Leaf = tt::Punct {
298 id: conv.id_alloc().alloc(entry.open_range, None),
299 char: match entry.subtree.delimiter.unwrap().kind {
300 tt::DelimiterKind::Parenthesis => '(',
301 tt::DelimiterKind::Brace => '{',
302 tt::DelimiterKind::Bracket => '[',
304 spacing: tt::Spacing::Alone,
307 parent.subtree.token_trees.push(leaf.into());
308 parent.subtree.token_trees.extend(entry.subtree.token_trees);
311 let subtree = stack.into_last().subtree;
312 if let [tt::TokenTree::Subtree(first)] = &*subtree.token_trees {
319 /// Returns the textual content of a doc comment block as a quoted string
320 /// That is, strips leading `///` (or `/**`, etc)
321 /// and strips the ending `*/`
322 /// And then quote the string, which is needed to convert to `tt::Literal`
323 fn doc_comment_text(comment: &ast::Comment) -> SmolStr {
324 let prefix_len = comment.prefix().len();
325 let mut text = &comment.text()[prefix_len..];
327 // Remove ending "*/"
328 if comment.kind().shape == ast::CommentShape::Block {
329 text = &text[0..text.len() - 2];
333 // Note that `tt::Literal` expect an escaped string
334 let text = format!("\"{}\"", text.escape_debug());
338 fn convert_doc_comment(token: &syntax::SyntaxToken) -> Option<Vec<tt::TokenTree>> {
339 cov_mark::hit!(test_meta_doc_comments);
340 let comment = ast::Comment::cast(token.clone())?;
341 let doc = comment.kind().doc?;
343 // Make `doc="\" Comments\""
344 let meta_tkns = vec![mk_ident("doc"), mk_punct('='), mk_doc_literal(&comment)];
347 let mut token_trees = Vec::with_capacity(3);
348 token_trees.push(mk_punct('#'));
349 if let ast::CommentPlacement::Inner = doc {
350 token_trees.push(mk_punct('!'));
352 token_trees.push(tt::TokenTree::from(tt::Subtree {
353 delimiter: Some(tt::Delimiter {
354 kind: tt::DelimiterKind::Bracket,
355 id: tt::TokenId::unspecified(),
357 token_trees: meta_tkns,
360 return Some(token_trees);
363 fn mk_ident(s: &str) -> tt::TokenTree {
364 tt::TokenTree::from(tt::Leaf::from(tt::Ident {
366 id: tt::TokenId::unspecified(),
370 fn mk_punct(c: char) -> tt::TokenTree {
371 tt::TokenTree::from(tt::Leaf::from(tt::Punct {
373 spacing: tt::Spacing::Alone,
374 id: tt::TokenId::unspecified(),
378 fn mk_doc_literal(comment: &ast::Comment) -> tt::TokenTree {
379 let lit = tt::Literal { text: doc_comment_text(comment), id: tt::TokenId::unspecified() };
381 tt::TokenTree::from(tt::Leaf::from(lit))
385 struct TokenIdAlloc {
387 global_offset: TextSize,
394 absolute_range: TextRange,
395 synthetic_id: Option<SyntheticTokenId>,
397 let relative_range = absolute_range - self.global_offset;
398 let token_id = tt::TokenId(self.next_id);
400 self.map.insert(token_id, relative_range);
401 if let Some(id) = synthetic_id {
402 self.map.insert_synthetic(token_id, id);
407 fn open_delim(&mut self, open_abs_range: TextRange) -> (tt::TokenId, usize) {
408 let token_id = tt::TokenId(self.next_id);
410 let idx = self.map.insert_delim(
412 open_abs_range - self.global_offset,
413 open_abs_range - self.global_offset,
418 fn close_delim(&mut self, idx: usize, close_abs_range: Option<TextRange>) {
419 match close_abs_range {
421 self.map.remove_delim(idx);
424 self.map.update_close_delim(idx, close - self.global_offset);
430 /// A raw token (straight from lexer) convertor
431 struct RawConvertor<'a> {
432 lexed: parser::LexedStr<'a>,
434 id_alloc: TokenIdAlloc,
437 trait SrcToken<Ctx>: std::fmt::Debug {
438 fn kind(&self, ctx: &Ctx) -> SyntaxKind;
440 fn to_char(&self, ctx: &Ctx) -> Option<char>;
442 fn to_text(&self, ctx: &Ctx) -> SmolStr;
444 fn synthetic_id(&self, ctx: &Ctx) -> Option<SyntheticTokenId>;
447 trait TokenConvertor: Sized {
448 type Token: SrcToken<Self>;
450 fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>>;
452 fn bump(&mut self) -> Option<(Self::Token, TextRange)>;
454 fn peek(&self) -> Option<Self::Token>;
456 fn id_alloc(&mut self) -> &mut TokenIdAlloc;
459 impl<'a> SrcToken<RawConvertor<'a>> for usize {
460 fn kind(&self, ctx: &RawConvertor<'a>) -> SyntaxKind {
461 ctx.lexed.kind(*self)
464 fn to_char(&self, ctx: &RawConvertor<'a>) -> Option<char> {
465 ctx.lexed.text(*self).chars().next()
468 fn to_text(&self, ctx: &RawConvertor<'_>) -> SmolStr {
469 ctx.lexed.text(*self).into()
472 fn synthetic_id(&self, _ctx: &RawConvertor<'a>) -> Option<SyntheticTokenId> {
477 impl<'a> TokenConvertor for RawConvertor<'a> {
480 fn convert_doc_comment(&self, &token: &usize) -> Option<Vec<tt::TokenTree>> {
481 let text = self.lexed.text(token);
482 convert_doc_comment(&doc_comment(text))
485 fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
486 if self.pos == self.lexed.len() {
489 let token = self.pos;
491 let range = self.lexed.text_range(token);
492 let range = TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
497 fn peek(&self) -> Option<Self::Token> {
498 if self.pos == self.lexed.len() {
504 fn id_alloc(&mut self) -> &mut TokenIdAlloc {
510 id_alloc: TokenIdAlloc,
511 current: Option<SyntaxToken>,
512 current_synthetic: Vec<SyntheticToken>,
513 preorder: PreorderWithTokens,
514 replace: FxHashMap<SyntaxNode, Vec<SyntheticToken>>,
515 append: FxHashMap<SyntaxNode, Vec<SyntheticToken>>,
517 punct_offset: Option<(SyntaxToken, TextSize)>,
523 global_offset: TextSize,
524 existing_token_map: TokenMap,
526 mut replace: FxHashMap<SyntaxNode, Vec<SyntheticToken>>,
527 mut append: FxHashMap<SyntaxNode, Vec<SyntheticToken>>,
529 let range = node.text_range();
530 let mut preorder = node.preorder_with_tokens();
531 let (first, synthetic) = Self::next_token(&mut preorder, &mut replace, &mut append);
533 id_alloc: { TokenIdAlloc { map: existing_token_map, global_offset, next_id } },
535 current_synthetic: synthetic,
545 preorder: &mut PreorderWithTokens,
546 replace: &mut FxHashMap<SyntaxNode, Vec<SyntheticToken>>,
547 append: &mut FxHashMap<SyntaxNode, Vec<SyntheticToken>>,
548 ) -> (Option<SyntaxToken>, Vec<SyntheticToken>) {
549 while let Some(ev) = preorder.next() {
551 WalkEvent::Enter(ele) => ele,
552 WalkEvent::Leave(SyntaxElement::Node(node)) => {
553 if let Some(mut v) = append.remove(&node) {
564 SyntaxElement::Token(t) => return (Some(t), Vec::new()),
565 SyntaxElement::Node(node) => {
566 if let Some(mut v) = replace.remove(&node) {
567 preorder.skip_subtree();
582 Ordinary(SyntaxToken),
583 // FIXME is this supposed to be `Punct`?
584 Punch(SyntaxToken, TextSize),
585 Synthetic(SyntheticToken),
589 fn token(&self) -> Option<&SyntaxToken> {
591 SynToken::Ordinary(it) | SynToken::Punch(it, _) => Some(it),
592 SynToken::Synthetic(_) => None,
597 impl SrcToken<Convertor> for SynToken {
598 fn kind(&self, _ctx: &Convertor) -> SyntaxKind {
600 SynToken::Ordinary(token) => token.kind(),
601 SynToken::Punch(token, _) => token.kind(),
602 SynToken::Synthetic(token) => token.kind,
605 fn to_char(&self, _ctx: &Convertor) -> Option<char> {
607 SynToken::Ordinary(_) => None,
608 SynToken::Punch(it, i) => it.text().chars().nth((*i).into()),
609 SynToken::Synthetic(token) if token.text.len() == 1 => token.text.chars().next(),
610 SynToken::Synthetic(_) => None,
613 fn to_text(&self, _ctx: &Convertor) -> SmolStr {
615 SynToken::Ordinary(token) => token.text().into(),
616 SynToken::Punch(token, _) => token.text().into(),
617 SynToken::Synthetic(token) => token.text.clone(),
621 fn synthetic_id(&self, _ctx: &Convertor) -> Option<SyntheticTokenId> {
623 SynToken::Synthetic(token) => Some(token.id),
629 impl TokenConvertor for Convertor {
630 type Token = SynToken;
631 fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>> {
632 convert_doc_comment(token.token()?)
635 fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
636 if let Some((punct, offset)) = self.punct_offset.clone() {
637 if usize::from(offset) + 1 < punct.text().len() {
638 let offset = offset + TextSize::of('.');
639 let range = punct.text_range();
640 self.punct_offset = Some((punct.clone(), offset));
641 let range = TextRange::at(range.start() + offset, TextSize::of('.'));
642 return Some((SynToken::Punch(punct, offset), range));
646 if let Some(synth_token) = self.current_synthetic.pop() {
647 if self.current_synthetic.is_empty() {
648 let (new_current, new_synth) =
649 Self::next_token(&mut self.preorder, &mut self.replace, &mut self.append);
650 self.current = new_current;
651 self.current_synthetic = new_synth;
653 let range = synth_token.range;
654 return Some((SynToken::Synthetic(synth_token), range));
657 let curr = self.current.clone()?;
658 if !&self.range.contains_range(curr.text_range()) {
661 let (new_current, new_synth) =
662 Self::next_token(&mut self.preorder, &mut self.replace, &mut self.append);
663 self.current = new_current;
664 self.current_synthetic = new_synth;
665 let token = if curr.kind().is_punct() {
666 self.punct_offset = Some((curr.clone(), 0.into()));
667 let range = curr.text_range();
668 let range = TextRange::at(range.start(), TextSize::of('.'));
669 (SynToken::Punch(curr, 0.into()), range)
671 self.punct_offset = None;
672 let range = curr.text_range();
673 (SynToken::Ordinary(curr), range)
679 fn peek(&self) -> Option<Self::Token> {
680 if let Some((punct, mut offset)) = self.punct_offset.clone() {
681 offset += TextSize::of('.');
682 if usize::from(offset) < punct.text().len() {
683 return Some(SynToken::Punch(punct, offset));
687 if let Some(synth_token) = self.current_synthetic.last() {
688 return Some(SynToken::Synthetic(synth_token.clone()));
691 let curr = self.current.clone()?;
692 if !self.range.contains_range(curr.text_range()) {
696 let token = if curr.kind().is_punct() {
697 SynToken::Punch(curr, 0.into())
699 SynToken::Ordinary(curr)
704 fn id_alloc(&mut self) -> &mut TokenIdAlloc {
709 struct TtTreeSink<'a> {
712 open_delims: FxHashMap<tt::TokenId, TextSize>,
714 inner: SyntaxTreeBuilder,
718 impl<'a> TtTreeSink<'a> {
719 fn new(cursor: Cursor<'a>) -> Self {
723 open_delims: FxHashMap::default(),
725 inner: SyntaxTreeBuilder::default(),
726 token_map: TokenMap::default(),
730 fn finish(mut self) -> (Parse<SyntaxNode>, TokenMap) {
731 self.token_map.shrink_to_fit();
732 (self.inner.finish(), self.token_map)
736 fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> &'static str {
737 let texts = match d {
738 tt::DelimiterKind::Parenthesis => "()",
739 tt::DelimiterKind::Brace => "{}",
740 tt::DelimiterKind::Bracket => "[]",
743 let idx = closing as usize;
744 &texts[idx..texts.len() - (1 - idx)]
747 impl<'a> TtTreeSink<'a> {
748 fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) {
749 if kind == LIFETIME_IDENT {
753 let mut last = self.cursor;
754 for _ in 0..n_tokens {
756 if self.cursor.eof() {
760 let text: &str = loop {
761 break match self.cursor.token_tree() {
762 Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
763 // Mark the range if needed
764 let (text, id) = match leaf {
765 tt::Leaf::Ident(ident) => (ident.text.as_str(), ident.id),
766 tt::Leaf::Punct(punct) => {
767 assert!(punct.char.is_ascii());
768 tmp = punct.char as u8;
769 (std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(), punct.id)
771 tt::Leaf::Literal(lit) => (lit.text.as_str(), lit.id),
773 let range = TextRange::at(self.text_pos, TextSize::of(text));
774 self.token_map.insert(id, range);
775 self.cursor = self.cursor.bump();
778 Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
779 self.cursor = self.cursor.subtree().unwrap();
780 match subtree.delimiter {
782 self.open_delims.insert(d.id, self.text_pos);
783 delim_to_str(d.kind, false)
789 let parent = self.cursor.end().unwrap();
790 self.cursor = self.cursor.bump();
791 match parent.delimiter {
793 if let Some(open_delim) = self.open_delims.get(&d.id) {
794 let open_range = TextRange::at(*open_delim, TextSize::of('('));
796 TextRange::at(self.text_pos, TextSize::of('('));
797 self.token_map.insert_delim(d.id, open_range, close_range);
799 delim_to_str(d.kind, true)
807 self.text_pos += TextSize::of(text);
810 self.inner.token(kind, self.buf.as_str());
812 // Add whitespace between adjoint puncts
813 let next = last.bump();
815 Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(curr), _)),
816 Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(_), _)),
817 ) = (last.token_tree(), next.token_tree())
819 // Note: We always assume the semi-colon would be the last token in
820 // other parts of RA such that we don't add whitespace here.
821 if curr.spacing == tt::Spacing::Alone && curr.char != ';' {
822 self.inner.token(WHITESPACE, " ");
823 self.text_pos += TextSize::of(' ');
828 fn start_node(&mut self, kind: SyntaxKind) {
829 self.inner.start_node(kind);
832 fn finish_node(&mut self) {
833 self.inner.finish_node();
836 fn error(&mut self, error: String) {
837 self.inner.error(error, self.text_pos)