]> git.lizzy.rs Git - rust.git/commitdiff
Move license template parsing into config phase
authorDavid Lukes <dafydd.lukes@gmail.com>
Mon, 19 Feb 2018 16:26:29 +0000 (17:26 +0100)
committerDavid Lukes <dafydd.lukes@gmail.com>
Mon, 5 Mar 2018 12:13:55 +0000 (13:13 +0100)
src/config/config_type.rs
src/config/mod.rs
src/lib.rs

index 950225679a5403dae593ae725909a81b1a2a9ea7..02e9b2d107836532d1643e5ec9feb53fad97581f 100644 (file)
@@ -78,6 +78,9 @@ macro_rules! create_config {
 
         #[derive(Clone)]
         pub struct Config {
+            // if a license_template_path has been specified, successfully read, parsed and compiled
+            // into a regex, it will be stored here
+            pub license_template: Option<Regex>,
             // For each config item, we store a bool indicating whether it has
             // been accessed and the value, and a bool whether the option was
             // manually initialised, or taken from the default,
@@ -118,8 +121,10 @@ impl<'a> ConfigSetter<'a> {
             $(
             pub fn $i(&mut self, value: $ty) {
                 (self.0).$i.2 = value;
-                if stringify!($i) == "use_small_heuristics" {
-                    self.0.set_heuristics();
+                match stringify!($i) {
+                    "use_small_heuristics" => self.0.set_heuristics(),
+                    "license_template_path" => self.0.set_license_template(),
+                    &_ => (),
                 }
             }
             )+
@@ -189,6 +194,7 @@ fn fill_from_parsed_config(mut self, parsed: PartialConfig) -> Config {
                 }
             )+
                 self.set_heuristics();
+                self.set_license_template();
                 self
             }
 
@@ -276,8 +282,10 @@ pub fn override_value(&mut self, key: &str, val: &str)
                     _ => panic!("Unknown config key in override: {}", key)
                 }
 
-                if key == "use_small_heuristics" {
-                    self.set_heuristics();
+                match key {
+                    "use_small_heuristics" => self.set_heuristics(),
+                    "license_template_path" => self.set_license_template(),
+                    &_ => (),
                 }
             }
 
@@ -382,12 +390,50 @@ fn set_heuristics(&mut self) {
                     self.set().width_heuristics(WidthHeuristics::null());
                 }
             }
+
+            fn set_license_template(&mut self) {
+                let license_template_path = self.license_template_path();
+                let mut license_template_file = match File::open(&license_template_path) {
+                    Ok(file) => file,
+                    Err(e) => {
+                        eprintln!("Warning: unable to open license template file {:?}: {}",
+                                  license_template_path, e);
+                        return;
+                    }
+                };
+                let mut license_template_str = String::new();
+                match license_template_file.read_to_string(&mut license_template_str) {
+                    Ok(_) => (),
+                    Err(e) => {
+                        eprintln!("Warning: unable to read from license template file {:?}: {}",
+                                  license_template_path, e);
+                        return;
+                    }
+                }
+                let license_template_parsed = match parse_license_template(&license_template_str) {
+                    Ok(string) => string,
+                    Err(e) => {
+                        eprintln!("Warning: unable to parse license template file {:?}: {}",
+                                  license_template_path, e);
+                        return;
+                    }
+                };
+                self.license_template = match Regex::new(&license_template_parsed) {
+                    Ok(re) => Some(re),
+                    Err(e) => {
+                        eprintln!("Warning: regex syntax error in placeholder, unable to compile \
+                                   license template from file {:?}: {}", license_template_path, e);
+                        return;
+                    }
+                }
+            }
         }
 
         // Template for the default configuration
         impl Default for Config {
             fn default() -> Config {
                 Config {
+                    license_template: None,
                     $(
                         $i: (Cell::new(false), false, $def, $stb),
                     )+
index c16d5bb679958d49557f0f464404c3dc8c8b9d85..53078716414fa3215f0c5e21eb6927a507f293db 100644 (file)
@@ -15,6 +15,8 @@
 use std::io::{Error, ErrorKind, Read};
 use std::path::{Path, PathBuf};
 
+use regex::Regex;
+
 #[macro_use]
 mod config_type;
 #[macro_use]
@@ -50,7 +52,7 @@
     comment_width: usize, 80, false,
         "Maximum length of comments. No effect unless wrap_comments = true";
     normalize_comments: bool, false, true, "Convert /* */ comments to // comments where possible";
-    license_template: String, String::default(), false, "Check for license";
+    license_template_path: String, String::default(), false, "Beginning of file must match license template";
 
     // Single line expressions and items.
     empty_item_single_line: bool, true, false,
@@ -172,9 +174,145 @@ pub fn get_toml_path(dir: &Path) -> Result<Option<PathBuf>, Error> {
     Ok(None)
 }
 
+/// Convert the license template into a string which can be turned into a regex.
+///
+/// The license template could use regex syntax directly, but that would require a lot of manual
+/// escaping, which is inconvenient. It is therefore literal by default, with optional regex
+/// subparts delimited by `{` and `}`. Additionally:
+///
+/// - to insert literal `{`, `}` or `\`, escape it with `\`
+/// - an empty regex placeholder (`{}`) is shorthand for `{.*?}`
+///
+/// This function parses this input format and builds a properly escaped *string* representation of
+/// the equivalent regular expression. It **does not** however guarantee that the returned string is
+/// a syntactically valid regular expression.
+///
+/// # Examples
+///
+/// ```
+/// assert_eq!(
+///     rustfmt_config::parse_license_template(
+///         r"
+/// // Copyright {\d+} The \} Rust \\ Project \{ Developers. See the {([A-Z]+)}
+/// // file at the top-level directory of this distribution and at
+/// // {}.
+/// //
+/// // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+/// // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+/// // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+/// // option. This file may not be copied, modified, or distributed
+/// // except according to those terms.
+/// "
+///     ).unwrap(),
+///     r"^
+/// // Copyright \d+ The \} Rust \\ Project \{ Developers\. See the ([A-Z]+)
+/// // file at the top\-level directory of this distribution and at
+/// // .*?\.
+/// //
+/// // Licensed under the Apache License, Version 2\.0 <LICENSE\-APACHE or
+/// // http://www\.apache\.org/licenses/LICENSE\-2\.0> or the MIT license
+/// // <LICENSE\-MIT or http://opensource\.org/licenses/MIT>, at your
+/// // option\. This file may not be copied, modified, or distributed
+/// // except according to those terms\.
+/// "
+/// );
+/// ```
+pub fn parse_license_template(template: &str) -> Result<String, String> {
+    // the template is parsed using a state machine
+    enum State {
+        Lit,
+        LitEsc,
+        // the u32 keeps track of brace nesting
+        Re(u32),
+        ReEsc(u32),
+    }
+
+    let mut parsed = String::from("^");
+    let mut buffer = String::new();
+    let mut state = State::Lit;
+    let mut linum = 1;
+    // keeps track of last line on which a regex placeholder was started
+    let mut open_brace_line = 0;
+    for chr in template.chars() {
+        if chr == '\n' {
+            linum += 1;
+        }
+        state = match state {
+            State::Lit => match chr {
+                '{' => {
+                    parsed.push_str(&regex::escape(&buffer));
+                    buffer.clear();
+                    open_brace_line = linum;
+                    State::Re(1)
+                }
+                '}' => return Err(format!("escape or balance closing brace on l. {}", linum)),
+                '\\' => State::LitEsc,
+                _ => {
+                    buffer.push(chr);
+                    State::Lit
+                }
+            },
+            State::LitEsc => {
+                buffer.push(chr);
+                State::Lit
+            }
+            State::Re(brace_nesting) => {
+                match chr {
+                    '{' => {
+                        buffer.push(chr);
+                        State::Re(brace_nesting + 1)
+                    }
+                    '}' => {
+                        match brace_nesting {
+                            1 => {
+                                // default regex for empty placeholder {}
+                                if buffer.is_empty() {
+                                    buffer = ".*?".to_string();
+                                }
+                                parsed.push_str(&buffer);
+                                buffer.clear();
+                                State::Lit
+                            }
+                            _ => {
+                                buffer.push(chr);
+                                State::Re(brace_nesting - 1)
+                            }
+                        }
+                    }
+                    '\\' => {
+                        buffer.push(chr);
+                        State::ReEsc(brace_nesting)
+                    }
+                    _ => {
+                        buffer.push(chr);
+                        State::Re(brace_nesting)
+                    }
+                }
+            }
+            State::ReEsc(brace_nesting) => {
+                buffer.push(chr);
+                State::Re(brace_nesting)
+            }
+        }
+    }
+    match state {
+        State::Re(_) | State::ReEsc(_) => {
+            return Err(format!(
+                "escape or balance opening brace on l. {}",
+                open_brace_line
+            ));
+        }
+        State::LitEsc => return Err(format!("incomplete escape sequence on l. {}", linum)),
+        _ => (),
+    }
+    parsed.push_str(&regex::escape(&buffer));
+
+    Ok(parsed)
+}
+
 #[cfg(test)]
 mod test {
-    use super::Config;
+    use super::{parse_license_template, Config};
 
     #[test]
     fn test_config_set() {
@@ -211,6 +349,43 @@ fn test_was_set() {
         assert_eq!(config.was_set().verbose(), false);
     }
 
+    #[test]
+    fn test_parse_license_template() {
+        assert_eq!(
+            parse_license_template("literal (.*)").unwrap(),
+            r"^literal \(\.\*\)"
+        );
+        assert_eq!(
+            parse_license_template(r"escaping \}").unwrap(),
+            r"^escaping \}"
+        );
+        assert!(parse_license_template("unbalanced } without escape").is_err());
+        assert_eq!(
+            parse_license_template(r"{\d+} place{-?}holder{s?}").unwrap(),
+            r"^\d+ place-?holders?"
+        );
+        assert_eq!(
+            parse_license_template("default {}").unwrap(),
+            "^default .*?"
+        );
+        assert_eq!(
+            parse_license_template(r"unbalanced nested braces {\{{3}}").unwrap(),
+            r"^unbalanced nested braces \{{3}"
+        );
+        assert_eq!(
+            parse_license_template("parsing error }").unwrap_err(),
+            "escape or balance closing brace on l. 1"
+        );
+        assert_eq!(
+            parse_license_template("parsing error {\nsecond line").unwrap_err(),
+            "escape or balance opening brace on l. 1"
+        );
+        assert_eq!(
+            parse_license_template(r"parsing error \").unwrap_err(),
+            "incomplete escape sequence on l. 1"
+        );
+    }
+
     // FIXME(#2183) these tests cannot be run in parallel because they use env vars
     // #[test]
     // fn test_as_not_nightly_channel() {
index 6771a2ab79406598f0bb26707b652ba2df500384..e53f8bfb90762afda4e0d940932eb1f2eadd4bf7 100644 (file)
@@ -43,7 +43,6 @@
 use syntax::codemap::{CodeMap, FilePathMapping};
 pub use syntax::codemap::FileName;
 use syntax::parse::{self, ParseSess};
-use regex::Regex;
 
 use checkstyle::{output_footer, output_header};
 use comment::{CharClasses, FullCodeCharKind};
@@ -102,8 +101,6 @@ pub enum ErrorKind {
     BadIssue(Issue),
     // License check has failed
     LicenseCheck,
-    // License template could not be parsed
-    ParsingLicense,
 }
 
 impl fmt::Display for ErrorKind {
@@ -117,7 +114,6 @@ fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
             ErrorKind::TrailingWhitespace => write!(fmt, "left behind trailing whitespace"),
             ErrorKind::BadIssue(issue) => write!(fmt, "found {}", issue),
             ErrorKind::LicenseCheck => write!(fmt, "license check failed"),
-            ErrorKind::ParsingLicense => write!(fmt, "parsing regex in license template failed"),
         }
     }
 }
@@ -136,8 +132,7 @@ fn msg_prefix(&self) -> &str {
         match self.kind {
             ErrorKind::LineOverflow(..)
             | ErrorKind::TrailingWhitespace
-            | ErrorKind::LicenseCheck
-            | ErrorKind::ParsingLicense => "error:",
+            | ErrorKind::LicenseCheck => "error:",
             ErrorKind::BadIssue(_) => "WARNING:",
         }
     }
@@ -415,82 +410,6 @@ fn should_report_error(
     }
 }
 
-fn check_license(text: &str, license_template: &str) -> Result<bool, regex::Error> {
-    // the template is parsed using a state machine
-    enum State {
-        Lit,
-        LitEsc,
-        // the u32 keeps track of brace nesting
-        Re(u32),
-        ReEsc(u32),
-    }
-
-    let mut template_re = String::from("^");
-    let mut buffer = String::new();
-    let mut state = State::Lit;
-    for chr in license_template.chars() {
-        state = match state {
-            State::Lit => match chr {
-                '{' => {
-                    template_re.push_str(&regex::escape(&buffer));
-                    buffer.clear();
-                    State::Re(1)
-                }
-                '}' => panic!("license template syntax error"),
-                '\\' => State::LitEsc,
-                _ => {
-                    buffer.push(chr);
-                    State::Lit
-                }
-            },
-            State::LitEsc => {
-                buffer.push(chr);
-                State::Lit
-            }
-            State::Re(brace_nesting) => {
-                match chr {
-                    '{' => {
-                        buffer.push(chr);
-                        State::Re(brace_nesting + 1)
-                    }
-                    '}' => {
-                        match brace_nesting {
-                            1 => {
-                                // default regex for empty placeholder {}
-                                if buffer.is_empty() {
-                                    buffer = ".*?".to_string();
-                                }
-                                template_re.push_str(&buffer);
-                                buffer.clear();
-                                State::Lit
-                            }
-                            _ => {
-                                buffer.push(chr);
-                                State::Re(brace_nesting - 1)
-                            }
-                        }
-                    }
-                    '\\' => {
-                        buffer.push(chr);
-                        State::ReEsc(brace_nesting)
-                    }
-                    _ => {
-                        buffer.push(chr);
-                        State::Re(brace_nesting)
-                    }
-                }
-            }
-            State::ReEsc(brace_nesting) => {
-                buffer.push(chr);
-                State::Re(brace_nesting)
-            }
-        }
-    }
-    template_re.push_str(&regex::escape(&buffer));
-    let template_re = Regex::new(&template_re)?;
-    Ok(template_re.is_match(text))
-}
-
 // Formatting done on a char by char or line by line basis.
 // FIXME(#20) other stuff for parity with make tidy
 fn format_lines(
@@ -513,28 +432,15 @@ fn format_lines(
     let allow_issue_seek = !issue_seeker.is_disabled();
 
     // Check license.
-    if config.was_set().license_template() {
-        match check_license(text, &config.license_template()) {
-            Ok(check) => {
-                if !check {
-                    errors.push(FormattingError {
-                        line: cur_line,
-                        kind: ErrorKind::LicenseCheck,
-                        is_comment: false,
-                        is_string: false,
-                        line_buffer: String::new(),
-                    });
-                }
-            }
-            Err(_) => {
-                errors.push(FormattingError {
-                    line: cur_line,
-                    kind: ErrorKind::ParsingLicense,
-                    is_comment: false,
-                    is_string: false,
-                    line_buffer: String::new(),
-                });
-            }
+    if let Some(ref license_template) = config.license_template {
+        if !license_template.is_match(text) {
+            errors.push(FormattingError {
+                line: cur_line,
+                kind: ErrorKind::LicenseCheck,
+                is_comment: false,
+                is_string: false,
+                line_buffer: String::new(),
+            });
         }
     }
 
@@ -964,7 +870,7 @@ pub fn run(input: Input, config: &Config) -> Summary {
 
 #[cfg(test)]
 mod test {
-    use super::{check_license, format_code_block, format_snippet, Config};
+    use super::{format_code_block, format_snippet, Config};
 
     #[test]
     fn test_no_panic_on_format_snippet_and_format_code_block() {
@@ -1050,39 +956,4 @@ fn test_format_code_block() {
 };";
         assert!(test_format_inner(format_code_block, code_block, expected));
     }
-
-    #[test]
-    fn test_check_license() {
-        assert!(check_license("literal matching", "literal matching").unwrap());
-        assert!(!check_license("literal no match", "literal matching").unwrap());
-        assert!(
-            check_license(
-                "Regex start and end: 2018",
-                r"{[Rr]egex} start {} end: {\d+}"
-            ).unwrap()
-        );
-        assert!(!check_license(
-            "Regex start and end no match: 2018",
-            r"{[Rr]egex} start {} end: {\d+}"
-        ).unwrap());
-        assert!(
-            check_license(
-                "Regex in the middle: 2018 (tm)",
-                r"Regex {} middle: {\d+} (tm)"
-            ).unwrap()
-        );
-        assert!(!check_license(
-            "Regex in the middle no match: 2018 (tm)",
-            r"Regex {} middle: {\d+} (tm)"
-        ).unwrap());
-        assert!(!check_license("default doesn't match\nacross lines", "default {} lines").unwrap());
-        assert!(check_license("", "this is not a valid {[regex}").is_err());
-        assert!(
-            check_license(
-                "parse unbalanced nested delimiters{{{",
-                r"parse unbalanced nested delimiters{\{{3}}"
-            ).unwrap()
-        );
-        assert!(check_license("escaping }", r"escaping \}").unwrap());
-    }
 }