11294: internal: Move format specifier lexing from syntax to ide_db r=Veykril a=Veykril
bors r+
Co-authored-by: Lukas Wirth <lukastw97@gmail.com>
use ide_db::RootDatabase;
use rustc_hash::FxHashMap;
use syntax::{
- ast::{self, HasFormatSpecifier},
+ ast::{self, IsString},
AstNode, AstToken, NodeOrToken,
SyntaxKind::*,
SyntaxNode, TextRange, WalkEvent, T,
}
highlight_format_string(hl, &string, &expanded_string, range);
// Highlight escape sequences
- if let Some(char_ranges) = string.char_ranges() {
- for (piece_range, _) in char_ranges.iter().filter(|(_, char)| char.is_ok()) {
- if string.text()[piece_range.start().into()..].starts_with('\\') {
- hl.add(HlRange {
- range: piece_range + range.start(),
- highlight: HlTag::EscapeSequence.into(),
- binding_hash: None,
- });
- }
+ string.escaped_char_ranges(&mut |piece_range, char| {
+ if char.is_err() {
+ return;
}
- }
+
+ if string.text()[piece_range.start().into()..].starts_with('\\') {
+ hl.add(HlRange {
+ range: piece_range + range.start(),
+ highlight: HlTag::EscapeSequence.into(),
+ binding_hash: None,
+ });
+ }
+ });
}
}
//! Syntax highlighting for format macro strings.
-use ide_db::{helpers::format_string::is_format_string, SymbolKind};
-use syntax::{
- ast::{self, FormatSpecifier, HasFormatSpecifier},
- TextRange,
+use ide_db::{
+ helpers::format_string::{is_format_string, lex_format_specifiers, FormatSpecifier},
+ SymbolKind,
};
+use syntax::{ast, TextRange};
use crate::{syntax_highlighting::highlights::Highlights, HlRange, HlTag};
return;
}
- string.lex_format_specifier(|piece_range, kind| {
+ lex_format_specifiers(string, &mut |piece_range, kind| {
if let Some(highlight) = highlight_format_specifier(kind) {
stack.add(HlRange {
range: piece_range + range.start(),
//! Tools to work with format string literals for the `format_args!` family of macros.
-use syntax::{ast, AstNode, AstToken};
+use syntax::{
+ ast::{self, IsString},
+ AstNode, AstToken, TextRange,
+};
pub fn is_format_string(string: &ast::String) -> bool {
// Check if `string` is a format string argument of a macro invocation.
//
// This setup lets us correctly highlight the components of `concat!("{}", "bla")` format
// strings. It still fails for `concat!("{", "}")`, but that is rare.
-
+ format!("{string} {bar}", bar = string);
(|| {
let macro_call = string.syntax().ancestors().find_map(ast::MacroCall::cast)?;
let name = macro_call.path()?.segment()?.name_ref()?;
})()
.is_some()
}
+
+#[derive(Debug)]
+pub enum FormatSpecifier {
+ Open,
+ Close,
+ Integer,
+ Identifier,
+ Colon,
+ Fill,
+ Align,
+ Sign,
+ NumberSign,
+ Zero,
+ DollarSign,
+ Dot,
+ Asterisk,
+ QuestionMark,
+}
+
+pub fn lex_format_specifiers(
+ string: &ast::String,
+ mut callback: &mut dyn FnMut(TextRange, FormatSpecifier),
+) {
+ let mut char_ranges = Vec::new();
+ string.escaped_char_ranges(&mut |range, res| char_ranges.push((range, res)));
+ let mut chars = char_ranges
+ .iter()
+ .filter_map(|(range, res)| Some((*range, *res.as_ref().ok()?)))
+ .peekable();
+
+ while let Some((range, first_char)) = chars.next() {
+ if let '{' = first_char {
+ // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
+ if let Some((_, '{')) = chars.peek() {
+ // Escaped format specifier, `{{`
+ chars.next();
+ continue;
+ }
+
+ callback(range, FormatSpecifier::Open);
+
+ // check for integer/identifier
+ let (_, int_char) = chars.peek().copied().unwrap_or_default();
+ match int_char {
+ // integer
+ '0'..='9' => read_integer(&mut chars, &mut callback),
+ // identifier
+ c if c == '_' || c.is_alphabetic() => read_identifier(&mut chars, &mut callback),
+ _ => {}
+ }
+
+ if let Some((_, ':')) = chars.peek() {
+ skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
+
+ // check for fill/align
+ let mut cloned = chars.clone().take(2);
+ let (_, first) = cloned.next().unwrap_or_default();
+ let (_, second) = cloned.next().unwrap_or_default();
+ match second {
+ '<' | '^' | '>' => {
+ // alignment specifier, first char specifies fillment
+ skip_char_and_emit(&mut chars, FormatSpecifier::Fill, &mut callback);
+ skip_char_and_emit(&mut chars, FormatSpecifier::Align, &mut callback);
+ }
+ _ => {
+ if let '<' | '^' | '>' = first {
+ skip_char_and_emit(&mut chars, FormatSpecifier::Align, &mut callback);
+ }
+ }
+ }
+
+ // check for sign
+ match chars.peek().copied().unwrap_or_default().1 {
+ '+' | '-' => {
+ skip_char_and_emit(&mut chars, FormatSpecifier::Sign, &mut callback);
+ }
+ _ => {}
+ }
+
+ // check for `#`
+ if let Some((_, '#')) = chars.peek() {
+ skip_char_and_emit(&mut chars, FormatSpecifier::NumberSign, &mut callback);
+ }
+
+ // check for `0`
+ let mut cloned = chars.clone().take(2);
+ let first = cloned.next().map(|next| next.1);
+ let second = cloned.next().map(|next| next.1);
+
+ if first == Some('0') && second != Some('$') {
+ skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
+ }
+
+ // width
+ match chars.peek().copied().unwrap_or_default().1 {
+ '0'..='9' => {
+ read_integer(&mut chars, &mut callback);
+ if let Some((_, '$')) = chars.peek() {
+ skip_char_and_emit(
+ &mut chars,
+ FormatSpecifier::DollarSign,
+ &mut callback,
+ );
+ }
+ }
+ c if c == '_' || c.is_alphabetic() => {
+ read_identifier(&mut chars, &mut callback);
+
+ if chars.peek().map(|&(_, c)| c) == Some('?') {
+ skip_char_and_emit(
+ &mut chars,
+ FormatSpecifier::QuestionMark,
+ &mut callback,
+ );
+ }
+
+ // can be either width (indicated by dollar sign, or type in which case
+ // the next sign has to be `}`)
+ let next = chars.peek().map(|&(_, c)| c);
+
+ match next {
+ Some('$') => skip_char_and_emit(
+ &mut chars,
+ FormatSpecifier::DollarSign,
+ &mut callback,
+ ),
+ Some('}') => {
+ skip_char_and_emit(
+ &mut chars,
+ FormatSpecifier::Close,
+ &mut callback,
+ );
+ continue;
+ }
+ _ => continue,
+ };
+ }
+ _ => {}
+ }
+
+ // precision
+ if let Some((_, '.')) = chars.peek() {
+ skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
+
+ match chars.peek().copied().unwrap_or_default().1 {
+ '*' => {
+ skip_char_and_emit(
+ &mut chars,
+ FormatSpecifier::Asterisk,
+ &mut callback,
+ );
+ }
+ '0'..='9' => {
+ read_integer(&mut chars, &mut callback);
+ if let Some((_, '$')) = chars.peek() {
+ skip_char_and_emit(
+ &mut chars,
+ FormatSpecifier::DollarSign,
+ &mut callback,
+ );
+ }
+ }
+ c if c == '_' || c.is_alphabetic() => {
+ read_identifier(&mut chars, &mut callback);
+ if chars.peek().map(|&(_, c)| c) != Some('$') {
+ continue;
+ }
+ skip_char_and_emit(
+ &mut chars,
+ FormatSpecifier::DollarSign,
+ &mut callback,
+ );
+ }
+ _ => {
+ continue;
+ }
+ }
+ }
+
+ // type
+ match chars.peek().copied().unwrap_or_default().1 {
+ '?' => {
+ skip_char_and_emit(
+ &mut chars,
+ FormatSpecifier::QuestionMark,
+ &mut callback,
+ );
+ }
+ c if c == '_' || c.is_alphabetic() => {
+ read_identifier(&mut chars, &mut callback);
+
+ if chars.peek().map(|&(_, c)| c) == Some('?') {
+ skip_char_and_emit(
+ &mut chars,
+ FormatSpecifier::QuestionMark,
+ &mut callback,
+ );
+ }
+ }
+ _ => {}
+ }
+ }
+
+ if let Some((_, '}')) = chars.peek() {
+ skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
+ }
+ continue;
+ }
+ }
+
+ fn skip_char_and_emit<I, F>(
+ chars: &mut std::iter::Peekable<I>,
+ emit: FormatSpecifier,
+ callback: &mut F,
+ ) where
+ I: Iterator<Item = (TextRange, char)>,
+ F: FnMut(TextRange, FormatSpecifier),
+ {
+ let (range, _) = chars.next().unwrap();
+ callback(range, emit);
+ }
+
+ fn read_integer<I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
+ where
+ I: Iterator<Item = (TextRange, char)>,
+ F: FnMut(TextRange, FormatSpecifier),
+ {
+ let (mut range, c) = chars.next().unwrap();
+ assert!(c.is_ascii_digit());
+ while let Some(&(r, next_char)) = chars.peek() {
+ if next_char.is_ascii_digit() {
+ chars.next();
+ range = range.cover(r);
+ } else {
+ break;
+ }
+ }
+ callback(range, FormatSpecifier::Integer);
+ }
+
+ fn read_identifier<I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
+ where
+ I: Iterator<Item = (TextRange, char)>,
+ F: FnMut(TextRange, FormatSpecifier),
+ {
+ let (mut range, c) = chars.next().unwrap();
+ assert!(c.is_alphabetic() || c == '_');
+ while let Some(&(r, next_char)) = chars.peek() {
+ if next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
+ chars.next();
+ range = range.cover(r);
+ } else {
+ break;
+ }
+ }
+ callback(range, FormatSpecifier::Identifier);
+ }
+}
SlicePatComponents, StructKind, TypeBoundKind, VisibilityKind,
},
operators::{ArithOp, BinaryOp, CmpOp, LogicOp, Ordering, RangeOp, UnaryOp},
- token_ext::{
- CommentKind, CommentPlacement, CommentShape, FormatSpecifier, HasFormatSpecifier, IsString,
- QuoteOffsets, Radix,
- },
+ token_ext::{CommentKind, CommentPlacement, CommentShape, IsString, QuoteOffsets, Radix},
traits::{
DocCommentIter, HasArgList, HasAttrs, HasDocComments, HasGenericParams, HasLoopBody,
HasModuleItem, HasName, HasTypeBounds, HasVisibility,
fn close_quote_text_range(&self) -> Option<TextRange> {
self.quote_offsets().map(|it| it.quotes.1)
}
+ fn escaped_char_ranges(
+ &self,
+ cb: &mut dyn FnMut(TextRange, Result<char, rustc_lexer::unescape::EscapeError>),
+ ) {
+ let text_range_no_quotes = match self.text_range_between_quotes() {
+ Some(it) => it,
+ None => return,
+ };
+
+ let start = self.syntax().text_range().start();
+ let text = &self.text()[text_range_no_quotes - start];
+ let offset = text_range_no_quotes.start() - start;
+
+ unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
+ let text_range =
+ TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
+ cb(text_range + offset, unescaped_char);
+ });
+ }
}
impl IsString for ast::String {}
}
}
-#[derive(Debug)]
-pub enum FormatSpecifier {
- Open,
- Close,
- Integer,
- Identifier,
- Colon,
- Fill,
- Align,
- Sign,
- NumberSign,
- Zero,
- DollarSign,
- Dot,
- Asterisk,
- QuestionMark,
-}
-
-pub trait HasFormatSpecifier: AstToken {
- fn char_ranges(
- &self,
- ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>>;
-
- fn lex_format_specifier<F>(&self, mut callback: F)
- where
- F: FnMut(TextRange, FormatSpecifier),
- {
- let char_ranges = match self.char_ranges() {
- Some(char_ranges) => char_ranges,
- None => return,
- };
- let mut chars = char_ranges
- .iter()
- .filter_map(|(range, res)| Some((*range, *res.as_ref().ok()?)))
- .peekable();
-
- while let Some((range, first_char)) = chars.next() {
- if let '{' = first_char {
- // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
- if let Some((_, '{')) = chars.peek() {
- // Escaped format specifier, `{{`
- chars.next();
- continue;
- }
-
- callback(range, FormatSpecifier::Open);
-
- // check for integer/identifier
- let (_, int_char) = chars.peek().copied().unwrap_or_default();
- match int_char {
- // integer
- '0'..='9' => read_integer(&mut chars, &mut callback),
- // identifier
- c if c == '_' || c.is_alphabetic() => {
- read_identifier(&mut chars, &mut callback)
- }
- _ => {}
- }
-
- if let Some((_, ':')) = chars.peek() {
- skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
-
- // check for fill/align
- let mut cloned = chars.clone().take(2);
- let (_, first) = cloned.next().unwrap_or_default();
- let (_, second) = cloned.next().unwrap_or_default();
- match second {
- '<' | '^' | '>' => {
- // alignment specifier, first char specifies fillment
- skip_char_and_emit(&mut chars, FormatSpecifier::Fill, &mut callback);
- skip_char_and_emit(&mut chars, FormatSpecifier::Align, &mut callback);
- }
- _ => {
- if let '<' | '^' | '>' = first {
- skip_char_and_emit(
- &mut chars,
- FormatSpecifier::Align,
- &mut callback,
- );
- }
- }
- }
-
- // check for sign
- match chars.peek().copied().unwrap_or_default().1 {
- '+' | '-' => {
- skip_char_and_emit(&mut chars, FormatSpecifier::Sign, &mut callback);
- }
- _ => {}
- }
-
- // check for `#`
- if let Some((_, '#')) = chars.peek() {
- skip_char_and_emit(&mut chars, FormatSpecifier::NumberSign, &mut callback);
- }
-
- // check for `0`
- let mut cloned = chars.clone().take(2);
- let first = cloned.next().map(|next| next.1);
- let second = cloned.next().map(|next| next.1);
-
- if first == Some('0') && second != Some('$') {
- skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
- }
-
- // width
- match chars.peek().copied().unwrap_or_default().1 {
- '0'..='9' => {
- read_integer(&mut chars, &mut callback);
- if let Some((_, '$')) = chars.peek() {
- skip_char_and_emit(
- &mut chars,
- FormatSpecifier::DollarSign,
- &mut callback,
- );
- }
- }
- c if c == '_' || c.is_alphabetic() => {
- read_identifier(&mut chars, &mut callback);
-
- if chars.peek().map(|&(_, c)| c) == Some('?') {
- skip_char_and_emit(
- &mut chars,
- FormatSpecifier::QuestionMark,
- &mut callback,
- );
- }
-
- // can be either width (indicated by dollar sign, or type in which case
- // the next sign has to be `}`)
- let next = chars.peek().map(|&(_, c)| c);
-
- match next {
- Some('$') => skip_char_and_emit(
- &mut chars,
- FormatSpecifier::DollarSign,
- &mut callback,
- ),
- Some('}') => {
- skip_char_and_emit(
- &mut chars,
- FormatSpecifier::Close,
- &mut callback,
- );
- continue;
- }
- _ => continue,
- };
- }
- _ => {}
- }
-
- // precision
- if let Some((_, '.')) = chars.peek() {
- skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
-
- match chars.peek().copied().unwrap_or_default().1 {
- '*' => {
- skip_char_and_emit(
- &mut chars,
- FormatSpecifier::Asterisk,
- &mut callback,
- );
- }
- '0'..='9' => {
- read_integer(&mut chars, &mut callback);
- if let Some((_, '$')) = chars.peek() {
- skip_char_and_emit(
- &mut chars,
- FormatSpecifier::DollarSign,
- &mut callback,
- );
- }
- }
- c if c == '_' || c.is_alphabetic() => {
- read_identifier(&mut chars, &mut callback);
- if chars.peek().map(|&(_, c)| c) != Some('$') {
- continue;
- }
- skip_char_and_emit(
- &mut chars,
- FormatSpecifier::DollarSign,
- &mut callback,
- );
- }
- _ => {
- continue;
- }
- }
- }
-
- // type
- match chars.peek().copied().unwrap_or_default().1 {
- '?' => {
- skip_char_and_emit(
- &mut chars,
- FormatSpecifier::QuestionMark,
- &mut callback,
- );
- }
- c if c == '_' || c.is_alphabetic() => {
- read_identifier(&mut chars, &mut callback);
-
- if chars.peek().map(|&(_, c)| c) == Some('?') {
- skip_char_and_emit(
- &mut chars,
- FormatSpecifier::QuestionMark,
- &mut callback,
- );
- }
- }
- _ => {}
- }
- }
-
- if let Some((_, '}')) = chars.peek() {
- skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
- }
- continue;
- }
- }
-
- fn skip_char_and_emit<I, F>(
- chars: &mut std::iter::Peekable<I>,
- emit: FormatSpecifier,
- callback: &mut F,
- ) where
- I: Iterator<Item = (TextRange, char)>,
- F: FnMut(TextRange, FormatSpecifier),
- {
- let (range, _) = chars.next().unwrap();
- callback(range, emit);
- }
-
- fn read_integer<I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
- where
- I: Iterator<Item = (TextRange, char)>,
- F: FnMut(TextRange, FormatSpecifier),
- {
- let (mut range, c) = chars.next().unwrap();
- assert!(c.is_ascii_digit());
- while let Some(&(r, next_char)) = chars.peek() {
- if next_char.is_ascii_digit() {
- chars.next();
- range = range.cover(r);
- } else {
- break;
- }
- }
- callback(range, FormatSpecifier::Integer);
- }
-
- fn read_identifier<I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
- where
- I: Iterator<Item = (TextRange, char)>,
- F: FnMut(TextRange, FormatSpecifier),
- {
- let (mut range, c) = chars.next().unwrap();
- assert!(c.is_alphabetic() || c == '_');
- while let Some(&(r, next_char)) = chars.peek() {
- if next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
- chars.next();
- range = range.cover(r);
- } else {
- break;
- }
- }
- callback(range, FormatSpecifier::Identifier);
- }
- }
-}
-
-impl HasFormatSpecifier for ast::String {
- fn char_ranges(
- &self,
- ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
- let text = self.text();
- let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
- let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();
-
- let mut res = Vec::with_capacity(text.len());
- unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
- res.push((
- TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap())
- + offset,
- unescaped_char,
- ));
- });
-
- Some(res)
- }
-}
-
impl ast::IntNumber {
pub fn radix(&self) -> Radix {
match self.text().get(..2).unwrap_or_default() {