]> git.lizzy.rs Git - rust.git/blobdiff - src/comment.rs
Merge pull request #3035 from topecongiro/issue-3006
[rust.git] / src / comment.rs
index 378fe40d569890a30aad1289f055f2a75e274daa..17ec29c3cbc575788748ebee2b76a1480658c788 100644 (file)
 use std::{self, borrow::Cow, iter};
 
 use itertools::{multipeek, MultiPeek};
-use syntax::codemap::Span;
+use syntax::source_map::Span;
 
 use config::Config;
 use rewrite::RewriteContext;
 use shape::{Indent, Shape};
 use string::{rewrite_string, StringFormat};
 use utils::{count_newlines, first_line_width, last_line_width};
+use {ErrorKind, FormattingError};
 
 fn is_custom_comment(comment: &str) -> bool {
     if !comment.starts_with("//") {
@@ -46,7 +47,7 @@ fn custom_opener(s: &str) -> &str {
     s.lines().next().map_or("", |first_line| {
         first_line
             .find(' ')
-            .map_or(first_line, |space_index| &first_line[0..space_index + 1])
+            .map_or(first_line, |space_index| &first_line[0..=space_index])
     })
 }
 
@@ -95,21 +96,6 @@ pub fn line_start(&self) -> &'a str {
     pub fn to_str_tuplet(&self) -> (&'a str, &'a str, &'a str) {
         (self.opener(), self.closer(), self.line_start())
     }
-
-    pub fn line_with_same_comment_style(&self, line: &str, normalize_comments: bool) -> bool {
-        match *self {
-            CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
-                line.trim_left().starts_with(self.line_start().trim_left())
-                    || comment_style(line, normalize_comments) == *self
-            }
-            CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
-                line.trim_left().starts_with(self.closer().trim_left())
-                    || line.trim_left().starts_with(self.line_start().trim_left())
-                    || comment_style(line, normalize_comments) == *self
-            }
-            CommentStyle::Custom(opener) => line.trim_left().starts_with(opener.trim_right()),
-        }
-    }
 }
 
 fn comment_style(orig: &str, normalize_comments: bool) -> CommentStyle {
@@ -249,7 +235,8 @@ fn _rewrite_comment(
     // If there are lines without a starting sigil, we won't format them correctly
     // so in that case we won't even re-align (if !config.normalize_comments()) and
     // we should stop now.
-    let num_bare_lines = orig.lines()
+    let num_bare_lines = orig
+        .lines()
         .map(|line| line.trim())
         .filter(|l| !(l.starts_with('*') || l.starts_with("//") || l.starts_with("/*")))
         .count();
@@ -271,17 +258,56 @@ fn identify_comment(
     is_doc_comment: bool,
 ) -> Option<String> {
     let style = comment_style(orig, false);
-    let first_group = orig.lines()
-        .take_while(|l| style.line_with_same_comment_style(l, false))
-        .collect::<Vec<_>>()
-        .join("\n");
-    let rest = orig.lines()
-        .skip(first_group.lines().count())
-        .collect::<Vec<_>>()
-        .join("\n");
+    let mut first_group_ending = 0;
+
+    fn compute_len(orig: &str, line: &str) -> usize {
+        if orig.len() > line.len() {
+            if orig.as_bytes()[line.len()] == b'\r' {
+                line.len() + 2
+            } else {
+                line.len() + 1
+            }
+        } else {
+            line.len()
+        }
+    }
 
+    match style {
+        CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
+            let line_start = style.line_start().trim_left();
+            for line in orig.lines() {
+                if line.trim_left().starts_with(line_start) || comment_style(line, false) == style {
+                    first_group_ending += compute_len(&orig[first_group_ending..], line);
+                } else {
+                    break;
+                }
+            }
+        }
+        CommentStyle::Custom(opener) => {
+            let trimmed_opener = opener.trim_right();
+            for line in orig.lines() {
+                if line.trim_left().starts_with(trimmed_opener) {
+                    first_group_ending += compute_len(&orig[first_group_ending..], line);
+                } else {
+                    break;
+                }
+            }
+        }
+        // for a block comment, search for the closing symbol
+        CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
+            let closer = style.closer().trim_left();
+            for line in orig.lines() {
+                first_group_ending += compute_len(&orig[first_group_ending..], line);
+                if line.trim_left().ends_with(closer) {
+                    break;
+                }
+            }
+        }
+    }
+
+    let (first_group, rest) = orig.split_at(first_group_ending);
     let first_group_str = rewrite_comment_inner(
-        &first_group,
+        first_group,
         block_style,
         style,
         shape,
@@ -291,7 +317,7 @@ fn identify_comment(
     if rest.is_empty() {
         Some(first_group_str)
     } else {
-        identify_comment(&rest, block_style, shape, config, is_doc_comment).map(|rest_str| {
+        identify_comment(rest, block_style, shape, config, is_doc_comment).map(|rest_str| {
             format!(
                 "{}\n{}{}",
                 first_group_str,
@@ -333,7 +359,8 @@ fn rewrite_comment_inner(
     };
 
     let line_breaks = count_newlines(orig.trim_right());
-    let lines = orig.lines()
+    let lines = orig
+        .lines()
         .enumerate()
         .map(|(i, mut line)| {
             line = trim_right_unless_two_whitespaces(line.trim_left(), is_doc_comment);
@@ -343,8 +370,7 @@ fn rewrite_comment_inner(
             }
 
             line
-        })
-        .map(|s| left_trim_comment_line(s, &style))
+        }).map(|s| left_trim_comment_line(s, &style))
         .map(|(line, has_leading_whitespace)| {
             if orig.starts_with("/*") && line_breaks == 0 {
                 (
@@ -383,14 +409,20 @@ fn rewrite_comment_inner(
             if line.starts_with("```") {
                 inside_code_block = false;
                 result.push_str(&comment_line_separator);
-                let code_block = ::format_code_block(&code_block_buffer, config)
-                    .unwrap_or_else(|| code_block_buffer.to_owned());
+                let code_block = {
+                    let mut config = config.clone();
+                    config.set().wrap_comments(false);
+                    match ::format_code_block(&code_block_buffer, &config) {
+                        Some(ref s) => trim_custom_comment_prefix(s),
+                        None => trim_custom_comment_prefix(&code_block_buffer),
+                    }
+                };
                 result.push_str(&join_code_block_with_comment_line_separator(&code_block));
                 code_block_buffer.clear();
                 result.push_str(&comment_line_separator);
                 result.push_str(line);
             } else {
-                code_block_buffer.push_str(line);
+                code_block_buffer.push_str(&hide_sharp_behind_comment(line));
                 code_block_buffer.push('\n');
 
                 if is_last {
@@ -398,14 +430,14 @@ fn rewrite_comment_inner(
                     // We will leave them untouched.
                     result.push_str(&comment_line_separator);
                     result.push_str(&join_code_block_with_comment_line_separator(
-                        &code_block_buffer,
+                        &trim_custom_comment_prefix(&code_block_buffer),
                     ));
                 }
             }
 
             continue;
         } else {
-            inside_code_block = line.starts_with("```rust");
+            inside_code_block = line.starts_with("```");
 
             if result == opener {
                 let force_leading_whitespace = opener == "/* " && count_newlines(orig) == 0;
@@ -417,8 +449,12 @@ fn rewrite_comment_inner(
                 }
             } else if is_prev_line_multi_line && !line.is_empty() {
                 result.push(' ')
-            } else if is_last && !closer.is_empty() && line.is_empty() {
-                result.push_str(&indent_str);
+            } else if is_last && line.is_empty() {
+                // trailing blank lines are unwanted
+                if !closer.is_empty() {
+                    result.push_str(&indent_str);
+                }
+                break;
             } else {
                 result.push_str(&comment_line_separator);
                 if !has_leading_whitespace && result.ends_with(' ') {
@@ -428,7 +464,7 @@ fn rewrite_comment_inner(
         }
 
         if config.wrap_comments() && line.len() > fmt.shape.width && !has_url(line) {
-            match rewrite_string(line, &fmt, Some(max_chars)) {
+            match rewrite_string(line, &fmt) {
                 Some(ref s) => {
                     is_prev_line_multi_line = s.contains('\n');
                     result.push_str(s);
@@ -439,7 +475,7 @@ fn rewrite_comment_inner(
                     result.pop();
                     result.push_str(&comment_line_separator);
                     fmt.shape = Shape::legacy(max_chars, fmt_indent);
-                    match rewrite_string(line, &fmt, Some(max_chars)) {
+                    match rewrite_string(line, &fmt) {
                         Some(ref s) => {
                             is_prev_line_multi_line = s.contains('\n');
                             result.push_str(s);
@@ -460,7 +496,7 @@ fn rewrite_comment_inner(
                 // 1 = " "
                 let offset = 1 + last_line_width(&result) - line_start.len();
                 Shape {
-                    width: max_chars.checked_sub(offset).unwrap_or(0),
+                    width: max_chars.saturating_sub(offset),
                     indent: fmt_indent,
                     offset: fmt.shape.offset + offset,
                 }
@@ -487,6 +523,29 @@ fn rewrite_comment_inner(
     Some(result)
 }
 
+const RUSTFMT_CUSTOM_COMMENT_PREFIX: &str = "//#### ";
+
+fn hide_sharp_behind_comment(s: &str) -> Cow<str> {
+    if s.trim_left().starts_with("# ") {
+        Cow::from(format!("{}{}", RUSTFMT_CUSTOM_COMMENT_PREFIX, s))
+    } else {
+        Cow::from(s)
+    }
+}
+
+fn trim_custom_comment_prefix(s: &str) -> String {
+    s.lines()
+        .map(|line| {
+            let left_trimmed = line.trim_left();
+            if left_trimmed.starts_with(RUSTFMT_CUSTOM_COMMENT_PREFIX) {
+                left_trimmed.trim_left_matches(RUSTFMT_CUSTOM_COMMENT_PREFIX)
+            } else {
+                line
+            }
+        }).collect::<Vec<_>>()
+        .join("\n")
+}
+
 /// Returns true if the given string MAY include URLs or alike.
 fn has_url(s: &str) -> bool {
     // This function may return false positive, but should get its job done in most cases.
@@ -553,7 +612,8 @@ fn light_rewrite_comment(
     config: &Config,
     is_doc_comment: bool,
 ) -> Option<String> {
-    let lines: Vec<&str> = orig.lines()
+    let lines: Vec<&str> = orig
+        .lines()
         .map(|l| {
             // This is basically just l.trim(), but in the case that a line starts
             // with `*` we want to leave one space before it, so it aligns with the
@@ -570,8 +630,7 @@ fn light_rewrite_comment(
             };
             // Preserve markdown's double-space line break syntax in doc comment.
             trim_right_unless_two_whitespaces(left_trimmed, is_doc_comment)
-        })
-        .collect();
+        }).collect();
     Some(lines.join(&format!("\n{}", offset.to_string(config))))
 }
 
@@ -579,7 +638,9 @@ fn light_rewrite_comment(
 /// Does not trim all whitespace. If a single space is trimmed from the left of the string,
 /// this function returns true.
 fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle) -> (&'a str, bool) {
-    if line.starts_with("//! ") || line.starts_with("/// ") || line.starts_with("/*! ")
+    if line.starts_with("//! ")
+        || line.starts_with("/// ")
+        || line.starts_with("/*! ")
         || line.starts_with("/** ")
     {
         (&line[4..], true)
@@ -589,13 +650,18 @@ fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle) -> (&'a str,
         } else {
             (&line[opener.trim_right().len()..], false)
         }
-    } else if line.starts_with("/* ") || line.starts_with("// ") || line.starts_with("//!")
-        || line.starts_with("///") || line.starts_with("** ")
+    } else if line.starts_with("/* ")
+        || line.starts_with("// ")
+        || line.starts_with("//!")
+        || line.starts_with("///")
+        || line.starts_with("** ")
         || line.starts_with("/*!")
         || (line.starts_with("/**") && !line.starts_with("/**/"))
     {
         (&line[3..], line.chars().nth(2).unwrap() == ' ')
-    } else if line.starts_with("/*") || line.starts_with("* ") || line.starts_with("//")
+    } else if line.starts_with("/*")
+        || line.starts_with("* ")
+        || line.starts_with("//")
         || line.starts_with("**")
     {
         (&line[2..], line.chars().nth(1).unwrap() == ' ')
@@ -719,6 +785,9 @@ enum CharClassesStatus {
     Normal,
     LitString,
     LitStringEscape,
+    LitRawString(u32),
+    RawStringPrefix(u32),
+    RawStringSuffix(u32),
     LitChar,
     LitCharEscape,
     // The u32 is the nesting deepness of the comment
@@ -752,13 +821,17 @@ pub enum FullCodeCharKind {
     InComment,
     /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
     EndComment,
+    /// Start of a mutlitine string
+    StartString,
+    /// End of a mutlitine string
+    EndString,
     /// Inside a string.
     InString,
 }
 
 impl FullCodeCharKind {
-    pub fn is_comment(&self) -> bool {
-        match *self {
+    pub fn is_comment(self) -> bool {
+        match self {
             FullCodeCharKind::StartComment
             | FullCodeCharKind::InComment
             | FullCodeCharKind::EndComment => true,
@@ -766,11 +839,11 @@ pub fn is_comment(&self) -> bool {
         }
     }
 
-    pub fn is_string(&self) -> bool {
-        *self == FullCodeCharKind::InString
+    pub fn is_string(self) -> bool {
+        self == FullCodeCharKind::InString || self == FullCodeCharKind::StartString
     }
 
-    fn to_codecharkind(&self) -> CodeCharKind {
+    fn to_codecharkind(self) -> CodeCharKind {
         if self.is_comment() {
             CodeCharKind::Comment
         } else {
@@ -792,6 +865,20 @@ pub fn new(base: T) -> CharClasses<T> {
     }
 }
 
+fn is_raw_string_suffix<T>(iter: &mut MultiPeek<T>, count: u32) -> bool
+where
+    T: Iterator,
+    T::Item: RichChar,
+{
+    for _ in 0..count {
+        match iter.peek() {
+            Some(c) if c.get_char() == '#' => continue,
+            _ => return false,
+        }
+    }
+    true
+}
+
 impl<T> Iterator for CharClasses<T>
 where
     T: Iterator,
@@ -804,17 +891,51 @@ fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
         let chr = item.get_char();
         let mut char_kind = FullCodeCharKind::Normal;
         self.status = match self.status {
-            CharClassesStatus::LitString => match chr {
-                '"' => CharClassesStatus::Normal,
-                '\\' => {
-                    char_kind = FullCodeCharKind::InString;
-                    CharClassesStatus::LitStringEscape
+            CharClassesStatus::LitRawString(sharps) => {
+                char_kind = FullCodeCharKind::InString;
+                match chr {
+                    '"' => {
+                        if sharps == 0 {
+                            char_kind = FullCodeCharKind::Normal;
+                            CharClassesStatus::Normal
+                        } else if is_raw_string_suffix(&mut self.base, sharps) {
+                            CharClassesStatus::RawStringSuffix(sharps)
+                        } else {
+                            CharClassesStatus::LitRawString(sharps)
+                        }
+                    }
+                    _ => CharClassesStatus::LitRawString(sharps),
                 }
-                _ => {
-                    char_kind = FullCodeCharKind::InString;
-                    CharClassesStatus::LitString
+            }
+            CharClassesStatus::RawStringPrefix(sharps) => {
+                char_kind = FullCodeCharKind::InString;
+                match chr {
+                    '#' => CharClassesStatus::RawStringPrefix(sharps + 1),
+                    '"' => CharClassesStatus::LitRawString(sharps),
+                    _ => CharClassesStatus::Normal, // Unreachable.
                 }
-            },
+            }
+            CharClassesStatus::RawStringSuffix(sharps) => {
+                match chr {
+                    '#' => {
+                        if sharps == 1 {
+                            CharClassesStatus::Normal
+                        } else {
+                            char_kind = FullCodeCharKind::InString;
+                            CharClassesStatus::RawStringSuffix(sharps - 1)
+                        }
+                    }
+                    _ => CharClassesStatus::Normal, // Unreachable
+                }
+            }
+            CharClassesStatus::LitString => {
+                char_kind = FullCodeCharKind::InString;
+                match chr {
+                    '"' => CharClassesStatus::Normal,
+                    '\\' => CharClassesStatus::LitStringEscape,
+                    _ => CharClassesStatus::LitString,
+                }
+            }
             CharClassesStatus::LitStringEscape => {
                 char_kind = FullCodeCharKind::InString;
                 CharClassesStatus::LitString
@@ -826,6 +947,13 @@ fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
             },
             CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
             CharClassesStatus::Normal => match chr {
+                'r' => match self.base.peek().map(|c| c.get_char()) {
+                    Some('#') | Some('"') => {
+                        char_kind = FullCodeCharKind::InString;
+                        CharClassesStatus::RawStringPrefix(0)
+                    }
+                    _ => CharClassesStatus::Normal,
+                },
                 '"' => {
                     char_kind = FullCodeCharKind::InString;
                     CharClassesStatus::LitString
@@ -921,15 +1049,26 @@ impl<'a> Iterator for LineClasses<'a> {
     type Item = (FullCodeCharKind, String);
 
     fn next(&mut self) -> Option<Self::Item> {
-        if self.base.peek().is_none() {
-            return None;
-        }
+        self.base.peek()?;
 
         let mut line = String::new();
 
+        let start_class = match self.base.peek() {
+            Some((kind, _)) => *kind,
+            None => FullCodeCharKind::Normal,
+        };
+
         while let Some((kind, c)) = self.base.next() {
-            self.kind = kind;
             if c == '\n' {
+                self.kind = match (start_class, kind) {
+                    (FullCodeCharKind::Normal, FullCodeCharKind::InString) => {
+                        FullCodeCharKind::StartString
+                    }
+                    (FullCodeCharKind::InString, FullCodeCharKind::Normal) => {
+                        FullCodeCharKind::EndString
+                    }
+                    _ => kind,
+                };
                 break;
             } else {
                 line.push(c);
@@ -1082,13 +1221,41 @@ pub fn recover_comment_removed(
 ) -> Option<String> {
     let snippet = context.snippet(span);
     if snippet != new && changed_comment_content(snippet, &new) {
-        // We missed some comments. Keep the original text.
+        // We missed some comments. Warn and keep the original text.
+        if context.config.error_on_unformatted() {
+            context.report.append(
+                context.source_map.span_to_filename(span).into(),
+                vec![FormattingError::from_span(
+                    span,
+                    &context.source_map,
+                    ErrorKind::LostComment,
+                )],
+            );
+        }
         Some(snippet.to_owned())
     } else {
         Some(new)
     }
 }
 
+pub fn filter_normal_code(code: &str) -> String {
+    let mut buffer = String::with_capacity(code.len());
+    LineClasses::new(code).for_each(|(kind, line)| match kind {
+        FullCodeCharKind::Normal
+        | FullCodeCharKind::StartString
+        | FullCodeCharKind::InString
+        | FullCodeCharKind::EndString => {
+            buffer.push_str(&line);
+            buffer.push('\n');
+        }
+        _ => (),
+    });
+    if !code.ends_with('\n') && buffer.ends_with('\n') {
+        buffer.pop();
+    }
+    buffer
+}
+
 /// Return true if the two strings of code have the same payload of comments.
 /// The payload of comments is everything in the string except:
 ///     - actual code (not comments)
@@ -1139,6 +1306,7 @@ fn new(comment: &'a str) -> CommentReducer<'a> {
 
 impl<'a> Iterator for CommentReducer<'a> {
     type Item = char;
+
     fn next(&mut self) -> Option<Self::Item> {
         loop {
             let mut c = self.iter.next()?;
@@ -1240,7 +1408,7 @@ fn comment_code_slices_three() {
     }
 
     #[test]
-    #[cfg_attr(rustfmt, rustfmt_skip)]
+    #[rustfmt::skip]
     fn format_comments() {
         let mut config: ::config::Config = Default::default();
         config.set().wrap_comments(true);
@@ -1287,8 +1455,7 @@ fn uncommented(text: &str) -> String {
             .filter_map(|(s, c)| match s {
                 FullCodeCharKind::Normal | FullCodeCharKind::InString => Some(c),
                 _ => None,
-            })
-            .collect()
+            }).collect()
     }
 
     #[test]
@@ -1339,7 +1506,24 @@ fn check(haystack: &str, needle: &str, expected: Option<usize>) {
 
     #[test]
     fn test_remove_trailing_white_spaces() {
-        let s = format!("    r#\"\n        test\n    \"#");
+        let s = "    r#\"\n        test\n    \"#";
         assert_eq!(remove_trailing_white_spaces(&s), s);
     }
+
+    #[test]
+    fn test_filter_normal_code() {
+        let s = r#"
+fn main() {
+    println!("hello, world");
+}
+"#;
+        assert_eq!(s, filter_normal_code(s));
+        let s_with_comment = r#"
+fn main() {
+    // hello, world
+    println!("hello, world");
+}
+"#;
+        assert_eq!(s, filter_normal_code(s_with_comment));
+    }
 }