1 //! This file contains code for parsing SSR rules, which look something like `foo($a) ==>> bar($b)`.
2 //! We first split everything before and after the separator `==>>`. Next, both the search pattern
3 //! and the replacement template get tokenized by the Rust tokenizer. Tokens are then searched for
4 //! placeholders, which start with `$`. For replacement templates, this is the final form. For
5 //! search patterns, we go further and parse the pattern as each kind of thing that we can match.
6 //! e.g. expressions, type references etc.
7 use rustc_hash::{FxHashMap, FxHashSet};
8 use std::{fmt::Display, str::FromStr};
9 use syntax::{SmolStr, SyntaxKind, SyntaxNode, T};
11 use crate::errors::bail;
12 use crate::{fragments, SsrError, SsrPattern, SsrRule};
15 pub(crate) struct ParsedRule {
16 pub(crate) placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>,
17 pub(crate) pattern: SyntaxNode,
18 pub(crate) template: Option<SyntaxNode>,
22 pub(crate) struct RawPattern {
23 tokens: Vec<PatternElement>,
26 // Part of a search or replace pattern.
27 #[derive(Clone, Debug, PartialEq, Eq)]
28 pub(crate) enum PatternElement {
30 Placeholder(Placeholder),
33 #[derive(Clone, Debug, PartialEq, Eq)]
34 pub(crate) struct Placeholder {
35 /// The name of this placeholder. e.g. for "$a", this would be "a"
36 pub(crate) ident: Var,
37 /// A unique name used in place of this placeholder when we parse the pattern as Rust code.
38 stand_in_name: String,
39 pub(crate) constraints: Vec<Constraint>,
42 /// Represents a `$var` in an SSR query.
43 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
44 pub(crate) struct Var(pub(crate) String);
46 #[derive(Clone, Debug, PartialEq, Eq)]
47 pub(crate) enum Constraint {
52 #[derive(Clone, Debug, PartialEq, Eq)]
53 pub(crate) enum NodeKind {
57 #[derive(Debug, Clone, PartialEq, Eq)]
58 pub(crate) struct Token {
60 pub(crate) text: SmolStr,
66 template: Option<&RawPattern>,
67 ) -> Result<Vec<ParsedRule>, SsrError> {
68 let raw_pattern = pattern.as_rust_code();
69 let raw_template = template.map(|t| t.as_rust_code());
70 let raw_template = raw_template.as_deref();
71 let mut builder = RuleBuilder {
72 placeholders_by_stand_in: pattern.placeholders_by_stand_in(),
76 let raw_template_stmt = raw_template.map(fragments::stmt);
77 if let raw_template_expr @ Some(Ok(_)) = raw_template.map(fragments::expr) {
78 builder.try_add(fragments::expr(&raw_pattern), raw_template_expr);
80 builder.try_add(fragments::expr(&raw_pattern), raw_template_stmt.clone());
82 builder.try_add(fragments::ty(&raw_pattern), raw_template.map(fragments::ty));
83 builder.try_add(fragments::item(&raw_pattern), raw_template.map(fragments::item));
84 builder.try_add(fragments::pat(&raw_pattern), raw_template.map(fragments::pat));
85 builder.try_add(fragments::stmt(&raw_pattern), raw_template_stmt);
91 placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>,
92 rules: Vec<ParsedRule>,
98 pattern: Result<SyntaxNode, ()>,
99 template: Option<Result<SyntaxNode, ()>>,
101 match (pattern, template) {
102 (Ok(pattern), Some(Ok(template))) => self.rules.push(ParsedRule {
103 placeholders_by_stand_in: self.placeholders_by_stand_in.clone(),
105 template: Some(template),
107 (Ok(pattern), None) => self.rules.push(ParsedRule {
108 placeholders_by_stand_in: self.placeholders_by_stand_in.clone(),
116 fn build(mut self) -> Result<Vec<ParsedRule>, SsrError> {
117 if self.rules.is_empty() {
118 bail!("Not a valid Rust expression, type, item, path or pattern");
120 // If any rules contain paths, then we reject any rules that don't contain paths. Allowing a
121 // mix leads to strange semantics, since the path-based rules only match things where the
122 // path refers to semantically the same thing, whereas the non-path-based rules could match
123 // anything. Specifically, if we have a rule like `foo ==>> bar` we only want to match the
124 // `foo` that is in the current scope, not any `foo`. However "foo" can be parsed as a
125 // pattern (IDENT_PAT -> NAME -> IDENT). Allowing such a rule through would result in
126 // renaming everything called `foo` to `bar`. It'd also be slow, since without a path, we'd
127 // have to use the slow-scan search mechanism.
128 if self.rules.iter().any(|rule| contains_path(&rule.pattern)) {
129 let old_len = self.rules.len();
130 self.rules.retain(|rule| contains_path(&rule.pattern));
131 if self.rules.len() < old_len {
132 cov_mark::hit!(pattern_is_a_single_segment_path);
139 /// Returns whether there are any paths in `node`.
140 fn contains_path(node: &SyntaxNode) -> bool {
141 node.kind() == SyntaxKind::PATH
142 || node.descendants().any(|node| node.kind() == SyntaxKind::PATH)
145 impl FromStr for SsrRule {
148 fn from_str(query: &str) -> Result<SsrRule, SsrError> {
149 let mut it = query.split("==>>");
150 let pattern = it.next().expect("at least empty string").trim();
153 .ok_or_else(|| SsrError("Cannot find delimiter `==>>`".into()))?
156 if it.next().is_some() {
157 return Err(SsrError("More than one delimiter found".into()));
159 let raw_pattern = pattern.parse()?;
160 let raw_template = template.parse()?;
161 let parsed_rules = ParsedRule::new(&raw_pattern, Some(&raw_template))?;
162 let rule = SsrRule { pattern: raw_pattern, template: raw_template, parsed_rules };
163 validate_rule(&rule)?;
168 impl FromStr for RawPattern {
171 fn from_str(pattern_str: &str) -> Result<RawPattern, SsrError> {
172 Ok(RawPattern { tokens: parse_pattern(pattern_str)? })
177 /// Returns this search pattern as Rust source code that we can feed to the Rust parser.
178 fn as_rust_code(&self) -> String {
179 let mut res = String::new();
180 for t in &self.tokens {
181 res.push_str(match t {
182 PatternElement::Token(token) => token.text.as_str(),
183 PatternElement::Placeholder(placeholder) => placeholder.stand_in_name.as_str(),
189 pub(crate) fn placeholders_by_stand_in(&self) -> FxHashMap<SmolStr, Placeholder> {
190 let mut res = FxHashMap::default();
191 for t in &self.tokens {
192 if let PatternElement::Placeholder(placeholder) = t {
193 res.insert(SmolStr::new(placeholder.stand_in_name.clone()), placeholder.clone());
200 impl FromStr for SsrPattern {
203 fn from_str(pattern_str: &str) -> Result<SsrPattern, SsrError> {
204 let raw_pattern = pattern_str.parse()?;
205 let parsed_rules = ParsedRule::new(&raw_pattern, None)?;
206 Ok(SsrPattern { parsed_rules })
210 /// Returns `pattern_str`, parsed as a search or replace pattern. If `remove_whitespace` is true,
211 /// then any whitespace tokens will be removed, which we do for the search pattern, but not for the
213 fn parse_pattern(pattern_str: &str) -> Result<Vec<PatternElement>, SsrError> {
214 let mut res = Vec::new();
215 let mut placeholder_names = FxHashSet::default();
216 let mut tokens = tokenize(pattern_str)?.into_iter();
217 while let Some(token) = tokens.next() {
218 if token.kind == T![$] {
219 let placeholder = parse_placeholder(&mut tokens)?;
220 if !placeholder_names.insert(placeholder.ident.clone()) {
221 bail!("Placeholder `{}` repeats more than once", placeholder.ident);
223 res.push(PatternElement::Placeholder(placeholder));
225 res.push(PatternElement::Token(token));
231 /// Checks for errors in a rule. e.g. the replace pattern referencing placeholders that the search
232 /// pattern didn't define.
233 fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> {
234 let mut defined_placeholders = FxHashSet::default();
235 for p in &rule.pattern.tokens {
236 if let PatternElement::Placeholder(placeholder) = p {
237 defined_placeholders.insert(&placeholder.ident);
240 let mut undefined = Vec::new();
241 for p in &rule.template.tokens {
242 if let PatternElement::Placeholder(placeholder) = p {
243 if !defined_placeholders.contains(&placeholder.ident) {
244 undefined.push(placeholder.ident.to_string());
246 if !placeholder.constraints.is_empty() {
247 bail!("Replacement placeholders cannot have constraints");
251 if !undefined.is_empty() {
252 bail!("Replacement contains undefined placeholders: {}", undefined.join(", "));
257 fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> {
258 let lexed = parser::LexedStr::new(source);
259 if let Some((_, first_error)) = lexed.errors().next() {
260 bail!("Failed to parse pattern: {}", first_error);
262 let mut tokens: Vec<Token> = Vec::new();
263 for i in 0..lexed.len() {
264 tokens.push(Token { kind: lexed.kind(i), text: lexed.text(i).into() });
269 fn parse_placeholder(tokens: &mut std::vec::IntoIter<Token>) -> Result<Placeholder, SsrError> {
271 let mut constraints = Vec::new();
272 if let Some(token) = tokens.next() {
274 SyntaxKind::IDENT => {
275 name = Some(token.text);
279 tokens.next().ok_or_else(|| SsrError::new("Unexpected end of placeholder"))?;
280 if token.kind == SyntaxKind::IDENT {
281 name = Some(token.text);
286 .ok_or_else(|| SsrError::new("Placeholder is missing closing brace '}'"))?;
289 constraints.push(parse_constraint(tokens)?);
292 _ => bail!("Unexpected token while parsing placeholder: '{}'", token.text),
297 bail!("Placeholders should either be $name or ${{name:constraints}}");
301 let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?;
302 Ok(Placeholder::new(name, constraints))
305 fn parse_constraint(tokens: &mut std::vec::IntoIter<Token>) -> Result<Constraint, SsrError> {
306 let constraint_type = tokens
308 .ok_or_else(|| SsrError::new("Found end of placeholder while looking for a constraint"))?
311 match constraint_type.as_str() {
313 expect_token(tokens, "(")?;
314 let t = tokens.next().ok_or_else(|| {
315 SsrError::new("Unexpected end of constraint while looking for kind")
317 if t.kind != SyntaxKind::IDENT {
318 bail!("Expected ident, found {:?} while parsing kind constraint", t.kind);
320 expect_token(tokens, ")")?;
321 Ok(Constraint::Kind(NodeKind::from(&t.text)?))
324 expect_token(tokens, "(")?;
325 let sub = parse_constraint(tokens)?;
326 expect_token(tokens, ")")?;
327 Ok(Constraint::Not(Box::new(sub)))
329 x => bail!("Unsupported constraint type '{}'", x),
333 fn expect_token(tokens: &mut std::vec::IntoIter<Token>, expected: &str) -> Result<(), SsrError> {
334 if let Some(t) = tokens.next() {
335 if t.text == expected {
338 bail!("Expected {} found {}", expected, t.text);
340 bail!("Expected {} found end of stream", expected);
344 fn from(name: &SmolStr) -> Result<NodeKind, SsrError> {
345 Ok(match name.as_str() {
346 "literal" => NodeKind::Literal,
347 _ => bail!("Unknown node kind '{}'", name),
353 fn new(name: SmolStr, constraints: Vec<Constraint>) -> Self {
355 stand_in_name: format!("__placeholder_{}", name),
357 ident: Var(name.to_string()),
362 impl Display for Var {
363 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
364 write!(f, "${}", self.0)
373 fn parser_happy_case() {
374 fn token(kind: SyntaxKind, text: &str) -> PatternElement {
375 PatternElement::Token(Token { kind, text: SmolStr::new(text) })
377 fn placeholder(name: &str) -> PatternElement {
378 PatternElement::Placeholder(Placeholder::new(SmolStr::new(name), Vec::new()))
380 let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap();
382 result.pattern.tokens,
384 token(SyntaxKind::IDENT, "foo"),
388 token(SyntaxKind::WHITESPACE, " "),
394 result.template.tokens,
396 token(SyntaxKind::IDENT, "bar"),
400 token(SyntaxKind::WHITESPACE, " "),