X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=crates%2Fmbe%2Fsrc%2Fsyntax_bridge.rs;h=d3489813e175e6742405f63840b3d978fd14d538;hb=c3601e9860e533c7990d90dbd773a49039bb037e;hp=39129b03056dca174224829daea1a6d26cb70110;hpb=d0d05075ed52aa22dfec36b5a7b23e6a1a554496;p=rust.git diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index 39129b03056..d3489813e17 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -1,6 +1,7 @@ //! Conversions between [`SyntaxNode`] and [`tt::TokenTree`]. use rustc_hash::{FxHashMap, FxHashSet}; +use stdx::{always, non_empty_vec::NonEmptyVec}; use syntax::{ ast::{self, make::tokens::doc_comment}, AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, SyntaxKind, @@ -9,29 +10,33 @@ }; use tt::buffer::{Cursor, TokenBuffer}; -use crate::{ - to_parser_tokens::to_parser_tokens, tt_iter::TtIter, ExpandError, ParserEntryPoint, TokenMap, -}; +use crate::{to_parser_input::to_parser_input, tt_iter::TtIter, TokenMap}; /// Convert the syntax node to a `TokenTree` (what macro /// will consume). pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) { - syntax_node_to_token_tree_censored(node, &Default::default()) + syntax_node_to_token_tree_censored(node, Default::default(), Default::default()) } +// TODO rename /// Convert the syntax node to a `TokenTree` (what macro will consume) /// with the censored range excluded. pub fn syntax_node_to_token_tree_censored( node: &SyntaxNode, - censor: &FxHashSet, + replace: FxHashMap>, + append: FxHashMap>, ) -> (tt::Subtree, TokenMap) { let global_offset = node.text_range().start(); - let mut c = Convertor::new(node, global_offset, censor); + let mut c = Convertor::new(node, global_offset, replace, append); let subtree = convert_tokens(&mut c); c.id_alloc.map.shrink_to_fit(); + always!(c.replace.is_empty()); + always!(c.append.is_empty()); (subtree, c.id_alloc.map) } +pub type SyntheticToken = (SyntaxKind, SmolStr); + // The following items are what `rustc` macro can be parsed into : // link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141 // * Expr(P) -> token_tree_to_expr @@ -46,33 +51,29 @@ pub fn syntax_node_to_token_tree_censored( pub fn token_tree_to_syntax_node( tt: &tt::Subtree, - entry_point: ParserEntryPoint, -) -> Result<(Parse, TokenMap), ExpandError> { + entry_point: parser::TopEntryPoint, +) -> (Parse, TokenMap) { let buffer = match tt { tt::Subtree { delimiter: None, token_trees } => { TokenBuffer::from_tokens(token_trees.as_slice()) } _ => TokenBuffer::from_subtree(tt), }; - let parser_tokens = to_parser_tokens(&buffer); - let tree_traversal = parser::parse(&parser_tokens, entry_point); + let parser_input = to_parser_input(&buffer); + let parser_output = entry_point.parse(&parser_input); let mut tree_sink = TtTreeSink::new(buffer.begin()); - for event in tree_traversal.iter() { + for event in parser_output.iter() { match event { - parser::TraversalStep::Token { kind, n_raw_tokens } => { + parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => { tree_sink.token(kind, n_raw_tokens) } - parser::TraversalStep::EnterNode { kind } => tree_sink.start_node(kind), - parser::TraversalStep::LeaveNode => tree_sink.finish_node(), - parser::TraversalStep::Error { msg } => tree_sink.error(msg.to_string()), + parser::Step::Enter { kind } => tree_sink.start_node(kind), + parser::Step::Exit => tree_sink.finish_node(), + parser::Step::Error { msg } => tree_sink.error(msg.to_string()), } } - if tree_sink.roots.len() != 1 { - return Err(ExpandError::ConversionError); - } - //FIXME: would be cool to report errors let (parse, range_map) = tree_sink.finish(); - Ok((parse, range_map)) + (parse, range_map) } /// Convert a string to a `TokenTree` @@ -83,7 +84,7 @@ pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> { } let mut conv = RawConvertor { - lexed: lexed, + lexed, pos: 0, id_alloc: TokenIdAlloc { map: Default::default(), @@ -106,7 +107,7 @@ pub fn parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec { let mut res = Vec::new(); while iter.peek_n(0).is_some() { - let expanded = iter.expect_fragment(ParserEntryPoint::Expr); + let expanded = iter.expect_fragment(parser::PrefixEntryPoint::Expr); res.push(match expanded.value { None => break, @@ -143,25 +144,26 @@ struct StackEntry { idx: !0, open_range: TextRange::empty(TextSize::of('.')), }; - let mut stack = vec![entry]; + let mut stack = NonEmptyVec::new(entry); loop { - let entry = stack.last_mut().unwrap(); - let result = &mut entry.subtree.token_trees; + let StackEntry { subtree, .. } = stack.last_mut(); + let result = &mut subtree.token_trees; let (token, range) = match conv.bump() { - None => break, Some(it) => it, + None => break, }; - let k: SyntaxKind = token.kind(&conv); - if k == COMMENT { + let kind = token.kind(&conv); + if kind == COMMENT { if let Some(tokens) = conv.convert_doc_comment(&token) { // FIXME: There has to be a better way to do this // Add the comments token id to the converted doc string let id = conv.id_alloc().alloc(range); result.extend(tokens.into_iter().map(|mut tt| { if let tt::TokenTree::Subtree(sub) = &mut tt { - if let tt::TokenTree::Leaf(tt::Leaf::Literal(lit)) = &mut sub.token_trees[2] + if let Some(tt::TokenTree::Leaf(tt::Leaf::Literal(lit))) = + sub.token_trees.get_mut(2) { lit.id = id } @@ -171,26 +173,26 @@ struct StackEntry { } continue; } - - result.push(if k.is_punct() && k != UNDERSCORE { + let tt = if kind.is_punct() && kind != UNDERSCORE { assert_eq!(range.len(), TextSize::of('.')); - if let Some(delim) = entry.subtree.delimiter { + if let Some(delim) = subtree.delimiter { let expected = match delim.kind { tt::DelimiterKind::Parenthesis => T![')'], tt::DelimiterKind::Brace => T!['}'], tt::DelimiterKind::Bracket => T![']'], }; - if k == expected { - let entry = stack.pop().unwrap(); - conv.id_alloc().close_delim(entry.idx, Some(range)); - stack.last_mut().unwrap().subtree.token_trees.push(entry.subtree.into()); + if kind == expected { + if let Some(entry) = stack.pop() { + conv.id_alloc().close_delim(entry.idx, Some(range)); + stack.last_mut().subtree.token_trees.push(entry.subtree.into()); + } continue; } } - let delim = match k { + let delim = match kind { T!['('] => Some(tt::DelimiterKind::Parenthesis), T!['{'] => Some(tt::DelimiterKind::Brace), T!['['] => Some(tt::DelimiterKind::Bracket), @@ -203,36 +205,35 @@ struct StackEntry { subtree.delimiter = Some(tt::Delimiter { id, kind }); stack.push(StackEntry { subtree, idx, open_range: range }); continue; - } else { - let spacing = match conv.peek() { - Some(next) - if next.kind(&conv).is_trivia() - || next.kind(&conv) == T!['['] - || next.kind(&conv) == T!['{'] - || next.kind(&conv) == T!['('] => - { - tt::Spacing::Alone - } - Some(next) if next.kind(&conv).is_punct() && next.kind(&conv) != UNDERSCORE => { - tt::Spacing::Joint - } - _ => tt::Spacing::Alone, - }; - let char = match token.to_char(&conv) { - Some(c) => c, - None => { - panic!("Token from lexer must be single char: token = {:#?}", token); - } - }; - tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range) }).into() } + + let spacing = match conv.peek().map(|next| next.kind(&conv)) { + Some(kind) + if !kind.is_trivia() + && kind.is_punct() + && kind != T!['['] + && kind != T!['{'] + && kind != T!['('] + && kind != UNDERSCORE => + { + tt::Spacing::Joint + } + _ => tt::Spacing::Alone, + }; + let char = match token.to_char(&conv) { + Some(c) => c, + None => { + panic!("Token from lexer must be single char: token = {:#?}", token); + } + }; + tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range) }).into() } else { macro_rules! make_leaf { ($i:ident) => { tt::$i { id: conv.id_alloc().alloc(range), text: token.to_text(conv) }.into() }; } - let leaf: tt::Leaf = match k { + let leaf: tt::Leaf = match kind { T![true] | T![false] => make_leaf!(Ident), IDENT => make_leaf!(Ident), UNDERSCORE => make_leaf!(Ident), @@ -260,15 +261,15 @@ macro_rules! make_leaf { }; leaf.into() - }); + }; + result.push(tt); } // If we get here, we've consumed all input tokens. // We might have more than one subtree in the stack, if the delimiters are improperly balanced. // Merge them so we're left with one. - while stack.len() > 1 { - let entry = stack.pop().unwrap(); - let parent = stack.last_mut().unwrap(); + while let Some(entry) = stack.pop() { + let parent = stack.last_mut(); conv.id_alloc().close_delim(entry.idx, None); let leaf: tt::Leaf = tt::Punct { @@ -285,13 +286,12 @@ macro_rules! make_leaf { parent.subtree.token_trees.extend(entry.subtree.token_trees); } - let subtree = stack.pop().unwrap().subtree; - if subtree.token_trees.len() == 1 { - if let tt::TokenTree::Subtree(first) = &subtree.token_trees[0] { - return first.clone(); - } + let subtree = stack.into_last().subtree; + if let [tt::TokenTree::Subtree(first)] = &*subtree.token_trees { + first.clone() + } else { + subtree } - subtree } /// Returns the textual content of a doc comment block as a quoted string @@ -322,7 +322,8 @@ fn convert_doc_comment(token: &syntax::SyntaxToken) -> Option let meta_tkns = vec![mk_ident("doc"), mk_punct('='), mk_doc_literal(&comment)]; // Make `#![]` - let mut token_trees = vec![mk_punct('#')]; + let mut token_trees = Vec::with_capacity(3); + token_trees.push(mk_punct('#')); if let ast::CommentPlacement::Inner = doc { token_trees.push(mk_punct('!')); } @@ -397,7 +398,7 @@ fn close_delim(&mut self, idx: usize, close_abs_range: Option) { } } -/// A Raw Token (straightly from lexer) convertor +/// A raw token (straight from lexer) convertor struct RawConvertor<'a> { lexed: parser::LexedStr<'a>, pos: usize, @@ -441,8 +442,8 @@ fn to_text(&self, ctx: &RawConvertor<'_>) -> SmolStr { impl<'a> TokenConvertor for RawConvertor<'a> { type Token = usize; - fn convert_doc_comment(&self, token: &usize) -> Option> { - let text = self.lexed.text(*token); + fn convert_doc_comment(&self, &token: &usize) -> Option> { + let text = self.lexed.text(token); convert_doc_comment(&doc_comment(text)) } @@ -470,87 +471,122 @@ fn id_alloc(&mut self) -> &mut TokenIdAlloc { } } -struct Convertor<'c> { +struct Convertor { id_alloc: TokenIdAlloc, current: Option, + current_synthetic: Vec, preorder: PreorderWithTokens, - censor: &'c FxHashSet, + replace: FxHashMap>, + append: FxHashMap>, range: TextRange, punct_offset: Option<(SyntaxToken, TextSize)>, } -impl<'c> Convertor<'c> { +impl Convertor { fn new( node: &SyntaxNode, global_offset: TextSize, - censor: &'c FxHashSet, - ) -> Convertor<'c> { + mut replace: FxHashMap>, + mut append: FxHashMap>, + ) -> Convertor { let range = node.text_range(); let mut preorder = node.preorder_with_tokens(); - let first = Self::next_token(&mut preorder, censor); + let (first, synthetic) = Self::next_token(&mut preorder, &mut replace, &mut append); Convertor { id_alloc: { TokenIdAlloc { map: TokenMap::default(), global_offset, next_id: 0 } }, current: first, + current_synthetic: synthetic, preorder, range, - censor, + replace, + append, punct_offset: None, } } fn next_token( preorder: &mut PreorderWithTokens, - censor: &FxHashSet, - ) -> Option { + replace: &mut FxHashMap>, + append: &mut FxHashMap>, + ) -> (Option, Vec) { while let Some(ev) = preorder.next() { let ele = match ev { WalkEvent::Enter(ele) => ele, + WalkEvent::Leave(SyntaxElement::Node(node)) => { + if let Some(mut v) = append.remove(&node) { + eprintln!("after {:?}, appending {:?}", node, v); + if !v.is_empty() { + v.reverse(); + return (None, v); + } + } + continue; + } _ => continue, }; match ele { - SyntaxElement::Token(t) => return Some(t), - SyntaxElement::Node(node) if censor.contains(&node) => preorder.skip_subtree(), - SyntaxElement::Node(_) => (), + SyntaxElement::Token(t) => return (Some(t), Vec::new()), + SyntaxElement::Node(node) => { + if let Some(mut v) = replace.remove(&node) { + preorder.skip_subtree(); + eprintln!("replacing {:?} by {:?}", node, v); + if !v.is_empty() { + v.reverse(); + return (None, v); + } + } + } } } - None + (None, Vec::new()) } } #[derive(Debug)] enum SynToken { Ordinary(SyntaxToken), + // FIXME is this supposed to be `Punct`? Punch(SyntaxToken, TextSize), + Synthetic(SyntheticToken), } impl SynToken { - fn token(&self) -> &SyntaxToken { + fn token(&self) -> Option<&SyntaxToken> { match self { - SynToken::Ordinary(it) => it, - SynToken::Punch(it, _) => it, + SynToken::Ordinary(it) | SynToken::Punch(it, _) => Some(it), + SynToken::Synthetic(_) => None, } } } -impl<'a> SrcToken> for SynToken { - fn kind(&self, _ctx: &Convertor<'a>) -> SyntaxKind { - self.token().kind() +impl SrcToken for SynToken { + fn kind(&self, _ctx: &Convertor) -> SyntaxKind { + match self { + SynToken::Ordinary(token) => token.kind(), + SynToken::Punch(token, _) => token.kind(), + SynToken::Synthetic((kind, _)) => *kind, + } } - fn to_char(&self, _ctx: &Convertor<'a>) -> Option { + fn to_char(&self, _ctx: &Convertor) -> Option { match self { SynToken::Ordinary(_) => None, SynToken::Punch(it, i) => it.text().chars().nth((*i).into()), + SynToken::Synthetic(_) => None, } } - fn to_text(&self, _ctx: &Convertor<'a>) -> SmolStr { - self.token().text().into() + fn to_text(&self, _ctx: &Convertor) -> SmolStr { + match self { + SynToken::Ordinary(token) => token.text().into(), + SynToken::Punch(token, _) => token.text().into(), + SynToken::Synthetic((_, text)) => text.clone(), + } } } -impl TokenConvertor for Convertor<'_> { +impl TokenConvertor for Convertor { type Token = SynToken; fn convert_doc_comment(&self, token: &Self::Token) -> Option> { - convert_doc_comment(token.token()) + convert_doc_comment(token.token()?) } fn bump(&mut self) -> Option<(Self::Token, TextRange)> { @@ -564,15 +600,29 @@ fn bump(&mut self) -> Option<(Self::Token, TextRange)> { } } + if let Some(synth_token) = self.current_synthetic.pop() { + if self.current_synthetic.is_empty() { + let (new_current, new_synth) = + Self::next_token(&mut self.preorder, &mut self.replace, &mut self.append); + self.current = new_current; + self.current_synthetic = new_synth; + } + // TODO fix range? + return Some((SynToken::Synthetic(synth_token), self.range)); + } + let curr = self.current.clone()?; if !&self.range.contains_range(curr.text_range()) { return None; } - self.current = Self::next_token(&mut self.preorder, self.censor); + let (new_current, new_synth) = + Self::next_token(&mut self.preorder, &mut self.replace, &mut self.append); + self.current = new_current; + self.current_synthetic = new_synth; let token = if curr.kind().is_punct() { + self.punct_offset = Some((curr.clone(), 0.into())); let range = curr.text_range(); let range = TextRange::at(range.start(), TextSize::of('.')); - self.punct_offset = Some((curr.clone(), 0.into())); (SynToken::Punch(curr, 0.into()), range) } else { self.punct_offset = None; @@ -591,6 +641,11 @@ fn peek(&self) -> Option { } } + if let Some(synth_token) = self.current_synthetic.last() { + // TODO fix range? + return Some(SynToken::Synthetic(synth_token.clone())); + } + let curr = self.current.clone()?; if !self.range.contains_range(curr.text_range()) { return None; @@ -616,10 +671,6 @@ struct TtTreeSink<'a> { text_pos: TextSize, inner: SyntaxTreeBuilder, token_map: TokenMap, - - // Number of roots - // Use for detect ill-form tree which is not single root - roots: smallvec::SmallVec<[usize; 1]>, } impl<'a> TtTreeSink<'a> { @@ -630,7 +681,6 @@ fn new(cursor: Cursor<'a>) -> Self { open_delims: FxHashMap::default(), text_pos: 0.into(), inner: SyntaxTreeBuilder::default(), - roots: smallvec::SmallVec::new(), token_map: TokenMap::default(), } } @@ -660,7 +710,7 @@ fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) { let mut last = self.cursor; for _ in 0..n_tokens { - let tmp_str: SmolStr; + let tmp: u8; if self.cursor.eof() { break; } @@ -670,18 +720,15 @@ fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) { Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => { // Mark the range if needed let (text, id) = match leaf { - tt::Leaf::Ident(ident) => (&ident.text, ident.id), + tt::Leaf::Ident(ident) => (ident.text.as_str(), ident.id), tt::Leaf::Punct(punct) => { assert!(punct.char.is_ascii()); - let char = &(punct.char as u8); - tmp_str = SmolStr::new_inline( - std::str::from_utf8(std::slice::from_ref(char)).unwrap(), - ); - (&tmp_str, punct.id) + tmp = punct.char as u8; + (std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(), punct.id) } - tt::Leaf::Literal(lit) => (&lit.text, lit.id), + tt::Leaf::Literal(lit) => (lit.text.as_str(), lit.id), }; - let range = TextRange::at(self.text_pos, TextSize::of(text.as_str())); + let range = TextRange::at(self.text_pos, TextSize::of(text)); self.token_map.insert(id, range); self.cursor = self.cursor.bump(); text @@ -738,16 +785,10 @@ fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) { fn start_node(&mut self, kind: SyntaxKind) { self.inner.start_node(kind); - - match self.roots.last_mut() { - None | Some(0) => self.roots.push(1), - Some(ref mut n) => **n += 1, - }; } fn finish_node(&mut self) { self.inner.finish_node(); - *self.roots.last_mut().unwrap() -= 1; } fn error(&mut self, error: String) {