From: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Date: Sat, 15 Jan 2022 12:18:46 +0000 (+0000) Subject: Merge #11294 X-Git-Url: https://git.lizzy.rs/?a=commitdiff_plain;h=7a52f8370024c7f3e8e2fd8c5df785cdbe804847;hp=4c34909308e9bbf5d35882daaf0f5fb556ee7ac4;p=rust.git Merge #11294 11294: internal: Move format specifier lexing from syntax to ide_db r=Veykril a=Veykril bors r+ Co-authored-by: Lukas Wirth --- diff --git a/crates/ide/src/syntax_highlighting.rs b/crates/ide/src/syntax_highlighting.rs index f20d629fbf1..f09f291e96a 100644 --- a/crates/ide/src/syntax_highlighting.rs +++ b/crates/ide/src/syntax_highlighting.rs @@ -16,7 +16,7 @@ use ide_db::RootDatabase; use rustc_hash::FxHashMap; use syntax::{ - ast::{self, HasFormatSpecifier}, + ast::{self, IsString}, AstNode, AstToken, NodeOrToken, SyntaxKind::*, SyntaxNode, TextRange, WalkEvent, T, @@ -336,17 +336,19 @@ fn traverse( } highlight_format_string(hl, &string, &expanded_string, range); // Highlight escape sequences - if let Some(char_ranges) = string.char_ranges() { - for (piece_range, _) in char_ranges.iter().filter(|(_, char)| char.is_ok()) { - if string.text()[piece_range.start().into()..].starts_with('\\') { - hl.add(HlRange { - range: piece_range + range.start(), - highlight: HlTag::EscapeSequence.into(), - binding_hash: None, - }); - } + string.escaped_char_ranges(&mut |piece_range, char| { + if char.is_err() { + return; } - } + + if string.text()[piece_range.start().into()..].starts_with('\\') { + hl.add(HlRange { + range: piece_range + range.start(), + highlight: HlTag::EscapeSequence.into(), + binding_hash: None, + }); + } + }); } } diff --git a/crates/ide/src/syntax_highlighting/format.rs b/crates/ide/src/syntax_highlighting/format.rs index 0aa97a61020..c74b9f56db6 100644 --- a/crates/ide/src/syntax_highlighting/format.rs +++ b/crates/ide/src/syntax_highlighting/format.rs @@ -1,9 +1,9 @@ //! Syntax highlighting for format macro strings. -use ide_db::{helpers::format_string::is_format_string, SymbolKind}; -use syntax::{ - ast::{self, FormatSpecifier, HasFormatSpecifier}, - TextRange, +use ide_db::{ + helpers::format_string::{is_format_string, lex_format_specifiers, FormatSpecifier}, + SymbolKind, }; +use syntax::{ast, TextRange}; use crate::{syntax_highlighting::highlights::Highlights, HlRange, HlTag}; @@ -17,7 +17,7 @@ pub(super) fn highlight_format_string( return; } - string.lex_format_specifier(|piece_range, kind| { + lex_format_specifiers(string, &mut |piece_range, kind| { if let Some(highlight) = highlight_format_specifier(kind) { stack.add(HlRange { range: piece_range + range.start(), diff --git a/crates/ide_db/src/helpers/format_string.rs b/crates/ide_db/src/helpers/format_string.rs index c615d07250e..29f61a95e74 100644 --- a/crates/ide_db/src/helpers/format_string.rs +++ b/crates/ide_db/src/helpers/format_string.rs @@ -1,5 +1,8 @@ //! Tools to work with format string literals for the `format_args!` family of macros. -use syntax::{ast, AstNode, AstToken}; +use syntax::{ + ast::{self, IsString}, + AstNode, AstToken, TextRange, +}; pub fn is_format_string(string: &ast::String) -> bool { // Check if `string` is a format string argument of a macro invocation. @@ -10,7 +13,7 @@ pub fn is_format_string(string: &ast::String) -> bool { // // This setup lets us correctly highlight the components of `concat!("{}", "bla")` format // strings. It still fails for `concat!("{", "}")`, but that is rare. - + format!("{string} {bar}", bar = string); (|| { let macro_call = string.syntax().ancestors().find_map(ast::MacroCall::cast)?; let name = macro_call.path()?.segment()?.name_ref()?; @@ -29,3 +32,261 @@ pub fn is_format_string(string: &ast::String) -> bool { })() .is_some() } + +#[derive(Debug)] +pub enum FormatSpecifier { + Open, + Close, + Integer, + Identifier, + Colon, + Fill, + Align, + Sign, + NumberSign, + Zero, + DollarSign, + Dot, + Asterisk, + QuestionMark, +} + +pub fn lex_format_specifiers( + string: &ast::String, + mut callback: &mut dyn FnMut(TextRange, FormatSpecifier), +) { + let mut char_ranges = Vec::new(); + string.escaped_char_ranges(&mut |range, res| char_ranges.push((range, res))); + let mut chars = char_ranges + .iter() + .filter_map(|(range, res)| Some((*range, *res.as_ref().ok()?))) + .peekable(); + + while let Some((range, first_char)) = chars.next() { + if let '{' = first_char { + // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax + if let Some((_, '{')) = chars.peek() { + // Escaped format specifier, `{{` + chars.next(); + continue; + } + + callback(range, FormatSpecifier::Open); + + // check for integer/identifier + let (_, int_char) = chars.peek().copied().unwrap_or_default(); + match int_char { + // integer + '0'..='9' => read_integer(&mut chars, &mut callback), + // identifier + c if c == '_' || c.is_alphabetic() => read_identifier(&mut chars, &mut callback), + _ => {} + } + + if let Some((_, ':')) = chars.peek() { + skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback); + + // check for fill/align + let mut cloned = chars.clone().take(2); + let (_, first) = cloned.next().unwrap_or_default(); + let (_, second) = cloned.next().unwrap_or_default(); + match second { + '<' | '^' | '>' => { + // alignment specifier, first char specifies fillment + skip_char_and_emit(&mut chars, FormatSpecifier::Fill, &mut callback); + skip_char_and_emit(&mut chars, FormatSpecifier::Align, &mut callback); + } + _ => { + if let '<' | '^' | '>' = first { + skip_char_and_emit(&mut chars, FormatSpecifier::Align, &mut callback); + } + } + } + + // check for sign + match chars.peek().copied().unwrap_or_default().1 { + '+' | '-' => { + skip_char_and_emit(&mut chars, FormatSpecifier::Sign, &mut callback); + } + _ => {} + } + + // check for `#` + if let Some((_, '#')) = chars.peek() { + skip_char_and_emit(&mut chars, FormatSpecifier::NumberSign, &mut callback); + } + + // check for `0` + let mut cloned = chars.clone().take(2); + let first = cloned.next().map(|next| next.1); + let second = cloned.next().map(|next| next.1); + + if first == Some('0') && second != Some('$') { + skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback); + } + + // width + match chars.peek().copied().unwrap_or_default().1 { + '0'..='9' => { + read_integer(&mut chars, &mut callback); + if let Some((_, '$')) = chars.peek() { + skip_char_and_emit( + &mut chars, + FormatSpecifier::DollarSign, + &mut callback, + ); + } + } + c if c == '_' || c.is_alphabetic() => { + read_identifier(&mut chars, &mut callback); + + if chars.peek().map(|&(_, c)| c) == Some('?') { + skip_char_and_emit( + &mut chars, + FormatSpecifier::QuestionMark, + &mut callback, + ); + } + + // can be either width (indicated by dollar sign, or type in which case + // the next sign has to be `}`) + let next = chars.peek().map(|&(_, c)| c); + + match next { + Some('$') => skip_char_and_emit( + &mut chars, + FormatSpecifier::DollarSign, + &mut callback, + ), + Some('}') => { + skip_char_and_emit( + &mut chars, + FormatSpecifier::Close, + &mut callback, + ); + continue; + } + _ => continue, + }; + } + _ => {} + } + + // precision + if let Some((_, '.')) = chars.peek() { + skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback); + + match chars.peek().copied().unwrap_or_default().1 { + '*' => { + skip_char_and_emit( + &mut chars, + FormatSpecifier::Asterisk, + &mut callback, + ); + } + '0'..='9' => { + read_integer(&mut chars, &mut callback); + if let Some((_, '$')) = chars.peek() { + skip_char_and_emit( + &mut chars, + FormatSpecifier::DollarSign, + &mut callback, + ); + } + } + c if c == '_' || c.is_alphabetic() => { + read_identifier(&mut chars, &mut callback); + if chars.peek().map(|&(_, c)| c) != Some('$') { + continue; + } + skip_char_and_emit( + &mut chars, + FormatSpecifier::DollarSign, + &mut callback, + ); + } + _ => { + continue; + } + } + } + + // type + match chars.peek().copied().unwrap_or_default().1 { + '?' => { + skip_char_and_emit( + &mut chars, + FormatSpecifier::QuestionMark, + &mut callback, + ); + } + c if c == '_' || c.is_alphabetic() => { + read_identifier(&mut chars, &mut callback); + + if chars.peek().map(|&(_, c)| c) == Some('?') { + skip_char_and_emit( + &mut chars, + FormatSpecifier::QuestionMark, + &mut callback, + ); + } + } + _ => {} + } + } + + if let Some((_, '}')) = chars.peek() { + skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback); + } + continue; + } + } + + fn skip_char_and_emit( + chars: &mut std::iter::Peekable, + emit: FormatSpecifier, + callback: &mut F, + ) where + I: Iterator, + F: FnMut(TextRange, FormatSpecifier), + { + let (range, _) = chars.next().unwrap(); + callback(range, emit); + } + + fn read_integer(chars: &mut std::iter::Peekable, callback: &mut F) + where + I: Iterator, + F: FnMut(TextRange, FormatSpecifier), + { + let (mut range, c) = chars.next().unwrap(); + assert!(c.is_ascii_digit()); + while let Some(&(r, next_char)) = chars.peek() { + if next_char.is_ascii_digit() { + chars.next(); + range = range.cover(r); + } else { + break; + } + } + callback(range, FormatSpecifier::Integer); + } + + fn read_identifier(chars: &mut std::iter::Peekable, callback: &mut F) + where + I: Iterator, + F: FnMut(TextRange, FormatSpecifier), + { + let (mut range, c) = chars.next().unwrap(); + assert!(c.is_alphabetic() || c == '_'); + while let Some(&(r, next_char)) = chars.peek() { + if next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() { + chars.next(); + range = range.cover(r); + } else { + break; + } + } + callback(range, FormatSpecifier::Identifier); + } +} diff --git a/crates/syntax/src/ast.rs b/crates/syntax/src/ast.rs index 421120602ee..5c1aed3cd48 100644 --- a/crates/syntax/src/ast.rs +++ b/crates/syntax/src/ast.rs @@ -25,10 +25,7 @@ SlicePatComponents, StructKind, TypeBoundKind, VisibilityKind, }, operators::{ArithOp, BinaryOp, CmpOp, LogicOp, Ordering, RangeOp, UnaryOp}, - token_ext::{ - CommentKind, CommentPlacement, CommentShape, FormatSpecifier, HasFormatSpecifier, IsString, - QuoteOffsets, Radix, - }, + token_ext::{CommentKind, CommentPlacement, CommentShape, IsString, QuoteOffsets, Radix}, traits::{ DocCommentIter, HasArgList, HasAttrs, HasDocComments, HasGenericParams, HasLoopBody, HasModuleItem, HasName, HasTypeBounds, HasVisibility, diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs index 3b0a1c5ddcd..16ac35b3991 100644 --- a/crates/syntax/src/ast/token_ext.rs +++ b/crates/syntax/src/ast/token_ext.rs @@ -164,6 +164,25 @@ fn open_quote_text_range(&self) -> Option { fn close_quote_text_range(&self) -> Option { self.quote_offsets().map(|it| it.quotes.1) } + fn escaped_char_ranges( + &self, + cb: &mut dyn FnMut(TextRange, Result), + ) { + let text_range_no_quotes = match self.text_range_between_quotes() { + Some(it) => it, + None => return, + }; + + let start = self.syntax().text_range().start(); + let text = &self.text()[text_range_no_quotes - start]; + let offset = text_range_no_quotes.start() - start; + + unescape_literal(text, Mode::Str, &mut |range, unescaped_char| { + let text_range = + TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap()); + cb(text_range + offset, unescaped_char); + }); + } } impl IsString for ast::String {} @@ -257,299 +276,6 @@ pub fn value(&self) -> Option> { } } -#[derive(Debug)] -pub enum FormatSpecifier { - Open, - Close, - Integer, - Identifier, - Colon, - Fill, - Align, - Sign, - NumberSign, - Zero, - DollarSign, - Dot, - Asterisk, - QuestionMark, -} - -pub trait HasFormatSpecifier: AstToken { - fn char_ranges( - &self, - ) -> Option)>>; - - fn lex_format_specifier(&self, mut callback: F) - where - F: FnMut(TextRange, FormatSpecifier), - { - let char_ranges = match self.char_ranges() { - Some(char_ranges) => char_ranges, - None => return, - }; - let mut chars = char_ranges - .iter() - .filter_map(|(range, res)| Some((*range, *res.as_ref().ok()?))) - .peekable(); - - while let Some((range, first_char)) = chars.next() { - if let '{' = first_char { - // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax - if let Some((_, '{')) = chars.peek() { - // Escaped format specifier, `{{` - chars.next(); - continue; - } - - callback(range, FormatSpecifier::Open); - - // check for integer/identifier - let (_, int_char) = chars.peek().copied().unwrap_or_default(); - match int_char { - // integer - '0'..='9' => read_integer(&mut chars, &mut callback), - // identifier - c if c == '_' || c.is_alphabetic() => { - read_identifier(&mut chars, &mut callback) - } - _ => {} - } - - if let Some((_, ':')) = chars.peek() { - skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback); - - // check for fill/align - let mut cloned = chars.clone().take(2); - let (_, first) = cloned.next().unwrap_or_default(); - let (_, second) = cloned.next().unwrap_or_default(); - match second { - '<' | '^' | '>' => { - // alignment specifier, first char specifies fillment - skip_char_and_emit(&mut chars, FormatSpecifier::Fill, &mut callback); - skip_char_and_emit(&mut chars, FormatSpecifier::Align, &mut callback); - } - _ => { - if let '<' | '^' | '>' = first { - skip_char_and_emit( - &mut chars, - FormatSpecifier::Align, - &mut callback, - ); - } - } - } - - // check for sign - match chars.peek().copied().unwrap_or_default().1 { - '+' | '-' => { - skip_char_and_emit(&mut chars, FormatSpecifier::Sign, &mut callback); - } - _ => {} - } - - // check for `#` - if let Some((_, '#')) = chars.peek() { - skip_char_and_emit(&mut chars, FormatSpecifier::NumberSign, &mut callback); - } - - // check for `0` - let mut cloned = chars.clone().take(2); - let first = cloned.next().map(|next| next.1); - let second = cloned.next().map(|next| next.1); - - if first == Some('0') && second != Some('$') { - skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback); - } - - // width - match chars.peek().copied().unwrap_or_default().1 { - '0'..='9' => { - read_integer(&mut chars, &mut callback); - if let Some((_, '$')) = chars.peek() { - skip_char_and_emit( - &mut chars, - FormatSpecifier::DollarSign, - &mut callback, - ); - } - } - c if c == '_' || c.is_alphabetic() => { - read_identifier(&mut chars, &mut callback); - - if chars.peek().map(|&(_, c)| c) == Some('?') { - skip_char_and_emit( - &mut chars, - FormatSpecifier::QuestionMark, - &mut callback, - ); - } - - // can be either width (indicated by dollar sign, or type in which case - // the next sign has to be `}`) - let next = chars.peek().map(|&(_, c)| c); - - match next { - Some('$') => skip_char_and_emit( - &mut chars, - FormatSpecifier::DollarSign, - &mut callback, - ), - Some('}') => { - skip_char_and_emit( - &mut chars, - FormatSpecifier::Close, - &mut callback, - ); - continue; - } - _ => continue, - }; - } - _ => {} - } - - // precision - if let Some((_, '.')) = chars.peek() { - skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback); - - match chars.peek().copied().unwrap_or_default().1 { - '*' => { - skip_char_and_emit( - &mut chars, - FormatSpecifier::Asterisk, - &mut callback, - ); - } - '0'..='9' => { - read_integer(&mut chars, &mut callback); - if let Some((_, '$')) = chars.peek() { - skip_char_and_emit( - &mut chars, - FormatSpecifier::DollarSign, - &mut callback, - ); - } - } - c if c == '_' || c.is_alphabetic() => { - read_identifier(&mut chars, &mut callback); - if chars.peek().map(|&(_, c)| c) != Some('$') { - continue; - } - skip_char_and_emit( - &mut chars, - FormatSpecifier::DollarSign, - &mut callback, - ); - } - _ => { - continue; - } - } - } - - // type - match chars.peek().copied().unwrap_or_default().1 { - '?' => { - skip_char_and_emit( - &mut chars, - FormatSpecifier::QuestionMark, - &mut callback, - ); - } - c if c == '_' || c.is_alphabetic() => { - read_identifier(&mut chars, &mut callback); - - if chars.peek().map(|&(_, c)| c) == Some('?') { - skip_char_and_emit( - &mut chars, - FormatSpecifier::QuestionMark, - &mut callback, - ); - } - } - _ => {} - } - } - - if let Some((_, '}')) = chars.peek() { - skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback); - } - continue; - } - } - - fn skip_char_and_emit( - chars: &mut std::iter::Peekable, - emit: FormatSpecifier, - callback: &mut F, - ) where - I: Iterator, - F: FnMut(TextRange, FormatSpecifier), - { - let (range, _) = chars.next().unwrap(); - callback(range, emit); - } - - fn read_integer(chars: &mut std::iter::Peekable, callback: &mut F) - where - I: Iterator, - F: FnMut(TextRange, FormatSpecifier), - { - let (mut range, c) = chars.next().unwrap(); - assert!(c.is_ascii_digit()); - while let Some(&(r, next_char)) = chars.peek() { - if next_char.is_ascii_digit() { - chars.next(); - range = range.cover(r); - } else { - break; - } - } - callback(range, FormatSpecifier::Integer); - } - - fn read_identifier(chars: &mut std::iter::Peekable, callback: &mut F) - where - I: Iterator, - F: FnMut(TextRange, FormatSpecifier), - { - let (mut range, c) = chars.next().unwrap(); - assert!(c.is_alphabetic() || c == '_'); - while let Some(&(r, next_char)) = chars.peek() { - if next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() { - chars.next(); - range = range.cover(r); - } else { - break; - } - } - callback(range, FormatSpecifier::Identifier); - } - } -} - -impl HasFormatSpecifier for ast::String { - fn char_ranges( - &self, - ) -> Option)>> { - let text = self.text(); - let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; - let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start(); - - let mut res = Vec::with_capacity(text.len()); - unescape_literal(text, Mode::Str, &mut |range, unescaped_char| { - res.push(( - TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap()) - + offset, - unescaped_char, - )); - }); - - Some(res) - } -} - impl ast::IntNumber { pub fn radix(&self) -> Radix { match self.text().get(..2).unwrap_or_default() {