]> git.lizzy.rs Git - rust.git/blob - src/tools/rust-analyzer/crates/ide-db/src/syntax_helpers/format_string_exprs.rs
Auto merge of #102622 - camsteffen:move-layout, r=fee1-dead
[rust.git] / src / tools / rust-analyzer / crates / ide-db / src / syntax_helpers / format_string_exprs.rs
1 //! Tools to work with expressions present in format string literals for the `format_args!` family of macros.
2 //! Primarily meant for assists and completions.
3
4 /// Enum for represenging extraced format string args.
5 /// Can either be extracted expressions (which includes identifiers),
6 /// or placeholders `{}`.
7 #[derive(Debug, PartialEq, Eq)]
8 pub enum Arg {
9     Placeholder,
10     Ident(String),
11     Expr(String),
12 }
13
14 /**
15  Add placeholders like `$1` and `$2` in place of [`Arg::Placeholder`],
16  and unwraps the [`Arg::Ident`] and [`Arg::Expr`] enums.
17  ```rust
18  # use ide_db::syntax_helpers::format_string_exprs::*;
19  assert_eq!(with_placeholders(vec![Arg::Ident("ident".to_owned()), Arg::Placeholder, Arg::Expr("expr + 2".to_owned())]), vec!["ident".to_owned(), "$1".to_owned(), "expr + 2".to_owned()])
20  ```
21 */
22
23 pub fn with_placeholders(args: Vec<Arg>) -> Vec<String> {
24     let mut placeholder_id = 1;
25     args.into_iter()
26         .map(move |a| match a {
27             Arg::Expr(s) | Arg::Ident(s) => s,
28             Arg::Placeholder => {
29                 let s = format!("${placeholder_id}");
30                 placeholder_id += 1;
31                 s
32             }
33         })
34         .collect()
35 }
36
37 /**
38  Parser for a format-like string. It is more allowing in terms of string contents,
39  as we expect variable placeholders to be filled with expressions.
40
41  Built for completions and assists, and escapes `\` and `$` in output.
42  (See the comments on `get_receiver_text()` for detail.)
43  Splits a format string that may contain expressions
44  like
45  ```rust
46  assert_eq!(parse("{ident} {} {expr + 42} ").unwrap(), ("{} {} {}", vec![Arg::Ident("ident"), Arg::Placeholder, Arg::Expr("expr + 42")]));
47  ```
48 */
49 pub fn parse_format_exprs(input: &str) -> Result<(String, Vec<Arg>), ()> {
50     #[derive(Debug, Clone, Copy, PartialEq)]
51     enum State {
52         NotArg,
53         MaybeArg,
54         Expr,
55         Ident,
56         MaybeIncorrect,
57         FormatOpts,
58     }
59
60     let mut state = State::NotArg;
61     let mut current_expr = String::new();
62     let mut extracted_expressions = Vec::new();
63     let mut output = String::new();
64
65     // Count of open braces inside of an expression.
66     // We assume that user knows what they're doing, thus we treat it like a correct pattern, e.g.
67     // "{MyStruct { val_a: 0, val_b: 1 }}".
68     let mut inexpr_open_count = 0;
69
70     let mut chars = input.chars().peekable();
71     while let Some(chr) = chars.next() {
72         match (state, chr) {
73             (State::NotArg, '{') => {
74                 output.push(chr);
75                 state = State::MaybeArg;
76             }
77             (State::NotArg, '}') => {
78                 output.push(chr);
79                 state = State::MaybeIncorrect;
80             }
81             (State::NotArg, _) => {
82                 if matches!(chr, '\\' | '$') {
83                     output.push('\\');
84                 }
85                 output.push(chr);
86             }
87             (State::MaybeIncorrect, '}') => {
88                 // It's okay, we met "}}".
89                 output.push(chr);
90                 state = State::NotArg;
91             }
92             (State::MaybeIncorrect, _) => {
93                 // Error in the string.
94                 return Err(());
95             }
96             // Escaped braces `{{`
97             (State::MaybeArg, '{') => {
98                 output.push(chr);
99                 state = State::NotArg;
100             }
101             (State::MaybeArg, '}') => {
102                 // This is an empty sequence '{}'.
103                 output.push(chr);
104                 extracted_expressions.push(Arg::Placeholder);
105                 state = State::NotArg;
106             }
107             (State::MaybeArg, _) => {
108                 if matches!(chr, '\\' | '$') {
109                     current_expr.push('\\');
110                 }
111                 current_expr.push(chr);
112
113                 // While Rust uses the unicode sets of XID_start and XID_continue for Identifiers
114                 // this is probably the best we can do to avoid a false positive
115                 if chr.is_alphabetic() || chr == '_' {
116                     state = State::Ident;
117                 } else {
118                     state = State::Expr;
119                 }
120             }
121             (State::Ident | State::Expr, '}') => {
122                 if inexpr_open_count == 0 {
123                     output.push(chr);
124
125                     if matches!(state, State::Expr) {
126                         extracted_expressions.push(Arg::Expr(current_expr.trim().into()));
127                     } else {
128                         extracted_expressions.push(Arg::Ident(current_expr.trim().into()));
129                     }
130
131                     current_expr = String::new();
132                     state = State::NotArg;
133                 } else {
134                     // We're closing one brace met before inside of the expression.
135                     current_expr.push(chr);
136                     inexpr_open_count -= 1;
137                 }
138             }
139             (State::Ident | State::Expr, ':') if matches!(chars.peek(), Some(':')) => {
140                 // path separator
141                 state = State::Expr;
142                 current_expr.push_str("::");
143                 chars.next();
144             }
145             (State::Ident | State::Expr, ':') => {
146                 if inexpr_open_count == 0 {
147                     // We're outside of braces, thus assume that it's a specifier, like "{Some(value):?}"
148                     output.push(chr);
149
150                     if matches!(state, State::Expr) {
151                         extracted_expressions.push(Arg::Expr(current_expr.trim().into()));
152                     } else {
153                         extracted_expressions.push(Arg::Ident(current_expr.trim().into()));
154                     }
155
156                     current_expr = String::new();
157                     state = State::FormatOpts;
158                 } else {
159                     // We're inside of braced expression, assume that it's a struct field name/value delimiter.
160                     current_expr.push(chr);
161                 }
162             }
163             (State::Ident | State::Expr, '{') => {
164                 state = State::Expr;
165                 current_expr.push(chr);
166                 inexpr_open_count += 1;
167             }
168             (State::Ident | State::Expr, _) => {
169                 if !(chr.is_alphanumeric() || chr == '_' || chr == '#') {
170                     state = State::Expr;
171                 }
172
173                 if matches!(chr, '\\' | '$') {
174                     current_expr.push('\\');
175                 }
176                 current_expr.push(chr);
177             }
178             (State::FormatOpts, '}') => {
179                 output.push(chr);
180                 state = State::NotArg;
181             }
182             (State::FormatOpts, _) => {
183                 if matches!(chr, '\\' | '$') {
184                     output.push('\\');
185                 }
186                 output.push(chr);
187             }
188         }
189     }
190
191     if state != State::NotArg {
192         return Err(());
193     }
194
195     Ok((output, extracted_expressions))
196 }
197
198 #[cfg(test)]
199 mod tests {
200     use super::*;
201     use expect_test::{expect, Expect};
202
203     fn check(input: &str, expect: &Expect) {
204         let (output, exprs) = parse_format_exprs(input).unwrap_or(("-".to_string(), vec![]));
205         let outcome_repr = if !exprs.is_empty() {
206             format!("{}; {}", output, with_placeholders(exprs).join(", "))
207         } else {
208             output
209         };
210
211         expect.assert_eq(&outcome_repr);
212     }
213
214     #[test]
215     fn format_str_parser() {
216         let test_vector = &[
217             ("no expressions", expect![["no expressions"]]),
218             (r"no expressions with \$0$1", expect![r"no expressions with \\\$0\$1"]),
219             ("{expr} is {2 + 2}", expect![["{} is {}; expr, 2 + 2"]]),
220             ("{expr:?}", expect![["{:?}; expr"]]),
221             ("{expr:1$}", expect![[r"{:1\$}; expr"]]),
222             ("{$0}", expect![[r"{}; \$0"]]),
223             ("{malformed", expect![["-"]]),
224             ("malformed}", expect![["-"]]),
225             ("{{correct", expect![["{{correct"]]),
226             ("correct}}", expect![["correct}}"]]),
227             ("{correct}}}", expect![["{}}}; correct"]]),
228             ("{correct}}}}}", expect![["{}}}}}; correct"]]),
229             ("{incorrect}}", expect![["-"]]),
230             ("placeholders {} {}", expect![["placeholders {} {}; $1, $2"]]),
231             ("mixed {} {2 + 2} {}", expect![["mixed {} {} {}; $1, 2 + 2, $2"]]),
232             (
233                 "{SomeStruct { val_a: 0, val_b: 1 }}",
234                 expect![["{}; SomeStruct { val_a: 0, val_b: 1 }"]],
235             ),
236             ("{expr:?} is {2.32f64:.5}", expect![["{:?} is {:.5}; expr, 2.32f64"]]),
237             (
238                 "{SomeStruct { val_a: 0, val_b: 1 }:?}",
239                 expect![["{:?}; SomeStruct { val_a: 0, val_b: 1 }"]],
240             ),
241             ("{     2 + 2        }", expect![["{}; 2 + 2"]]),
242             ("{strsim::jaro_winkle(a)}", expect![["{}; strsim::jaro_winkle(a)"]]),
243             ("{foo::bar::baz()}", expect![["{}; foo::bar::baz()"]]),
244             ("{foo::bar():?}", expect![["{:?}; foo::bar()"]]),
245         ];
246
247         for (input, output) in test_vector {
248             check(input, output)
249         }
250     }
251
252     #[test]
253     fn arg_type() {
254         assert_eq!(
255             parse_format_exprs("{_ident} {r#raw_ident} {expr.obj} {name {thing: 42} } {}")
256                 .unwrap()
257                 .1,
258             vec![
259                 Arg::Ident("_ident".to_owned()),
260                 Arg::Ident("r#raw_ident".to_owned()),
261                 Arg::Expr("expr.obj".to_owned()),
262                 Arg::Expr("name {thing: 42}".to_owned()),
263                 Arg::Placeholder
264             ]
265         );
266     }
267 }