]> git.lizzy.rs Git - rust.git/blobdiff - clippy_lints/src/doc.rs
remove stars at the beginning of multiline comments
[rust.git] / clippy_lints / src / doc.rs
index 4183632543b7a91fb2dc377b94c1d4a491bbf324..6dbf2ea959e6a667f1be86e3a52079e48a3bf570 100644 (file)
@@ -1,6 +1,9 @@
+use itertools::Itertools;
+use pulldown_cmark;
 use rustc::lint::*;
 use syntax::ast;
 use syntax::codemap::{Span, BytePos};
+use syntax_pos::Pos;
 use utils::span_lint;
 
 /// **What it does:** Checks for the presence of `_`, `::` or camel-case words
@@ -16,7 +19,8 @@
 ///
 /// **Examples:**
 /// ```rust
-/// /// Do something with the foo_bar parameter. See also that::other::module::foo.
+/// /// Do something with the foo_bar parameter. See also
+/// that::other::module::foo.
 /// // ^ `foo_bar` and `that::other::module::foo` should be ticked.
 /// fn doit(foo_bar) { .. }
 /// ```
@@ -53,328 +57,217 @@ fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) {
     }
 }
 
+struct Parser<'a> {
+    parser: pulldown_cmark::Parser<'a>,
+}
+
+impl<'a> Parser<'a> {
+    fn new(parser: pulldown_cmark::Parser<'a>) -> Parser<'a> {
+        Self { parser: parser }
+    }
+}
+
+impl<'a> Iterator for Parser<'a> {
+    type Item = (usize, pulldown_cmark::Event<'a>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let offset = self.parser.get_offset();
+        self.parser.next().map(|event| (offset, event))
+    }
+}
+
 /// Cleanup documentation decoration (`///` and such).
 ///
 /// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or
-/// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we need to keep track of
-/// the span but this function is inspired from the later.
+/// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we
+/// need to keep track of
+/// the spans but this function is inspired from the later.
 #[allow(cast_possible_truncation)]
-pub fn strip_doc_comment_decoration((comment, span): (String, Span)) -> Vec<(String, Span)> {
+pub fn strip_doc_comment_decoration(comment: &str, span: Span) -> (String, Vec<(usize, Span)>) {
     // one-line comments lose their prefix
     const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
     for prefix in ONELINERS {
         if comment.starts_with(*prefix) {
-            return vec![(
-                comment[prefix.len()..].to_owned(),
-                Span { lo: span.lo + BytePos(prefix.len() as u32), ..span }
-            )];
+            let doc = &comment[prefix.len()..];
+            let mut doc = doc.to_owned();
+            doc.push('\n');
+            return (
+                doc.to_owned(),
+                vec![
+                    (
+                        doc.len(),
+                        Span {
+                            lo: span.lo + BytePos(prefix.len() as u32),
+                            ..span
+                        }
+                    ),
+                ],
+            );
         }
     }
 
     if comment.starts_with("/*") {
-        return comment[3..comment.len() - 2].lines().map(|line| {
+        let doc = &comment[3..comment.len() - 2];
+        let mut sizes = vec![];
+        let mut contains_initial_stars = false;
+        for line in doc.lines() {
             let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
             debug_assert_eq!(offset as u32 as usize, offset);
-
-            (
-                line.to_owned(),
+            contains_initial_stars |= line.trim_left().starts_with('*');
+            // +1 for the newline
+            sizes.push((
+                line.len() + 1,
                 Span {
                     lo: span.lo + BytePos(offset as u32),
                     ..span
+                },
+            ));
+        }
+        if !contains_initial_stars {
+            return (doc.to_string(), sizes);
+        }
+        // remove the initial '*'s if any
+        let mut no_stars = String::with_capacity(doc.len());
+        for line in doc.lines() {
+            let mut chars = line.chars();
+            while let Some(c) = chars.next() {
+                if c.is_whitespace() {
+                    no_stars.push(c);
+                } else {
+                    no_stars.push(if c == '*' { ' ' } else { c });
+                    break;
                 }
-            )
-        }).collect();
+            }
+            no_stars.push_str(chars.as_str());
+            no_stars.push('\n');
+        }
+        return (no_stars, sizes);
     }
 
     panic!("not a doc-comment: {}", comment);
 }
 
 pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
-    let mut docs = vec![];
+    let mut doc = String::new();
+    let mut spans = vec![];
 
     for attr in attrs {
         if attr.is_sugared_doc {
-            if let ast::MetaItemKind::NameValue(ref doc) = attr.value.node {
-                if let ast::LitKind::Str(ref doc, _) = doc.node {
-                    let doc = (*doc.as_str()).to_owned();
-                    docs.extend_from_slice(&strip_doc_comment_decoration((doc, attr.span)));
-                }
+            if let Some(ref current) = attr.value_str() {
+                let current = current.to_string();
+                let (current, current_spans) = strip_doc_comment_decoration(&current, attr.span);
+                spans.extend_from_slice(&current_spans);
+                doc.push_str(&current);
+            }
+        } else if let Some(name) = attr.name() {
+            // ignore mix of sugared and non-sugared doc
+            if name == "doc" {
+                return;
             }
         }
     }
 
-    if !docs.is_empty() {
-        let _ = check_doc(cx, valid_idents, &docs);
-    }
-}
-
-#[allow(while_let_loop)] // #362
-fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(String, Span)]) -> Result<(), ()> {
-    // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
-    // There really is no markdown specification that would disambiguate this properly. This is
-    // what GitHub and Rustdoc do:
-    //
-    // foo_bar test_quz    → foo_bar test_quz
-    // foo_bar_baz         → foo_bar_baz (note that the “official” spec says this should be emphasized)
-    // _foo bar_ test_quz_ → <em>foo bar</em> test_quz_
-    // \_foo bar\_         → _foo bar_
-    // (_baz_)             → (<em>baz</em>)
-    // foo _ bar _ baz     → foo _ bar _ baz
-
-    /// Character that can appear in a path
-    fn is_path_char(c: char) -> bool {
-        match c {
-            t if t.is_alphanumeric() => true,
-            ':' | '_' => true,
-            _ => false,
-        }
-    }
-
-    #[derive(Clone, Debug)]
-    /// This type is used to iterate through the documentation characters, keeping the span at the
-    /// same time.
-    struct Parser<'a> {
-        /// First byte of the current potential match
-        current_word_begin: usize,
-        /// List of lines and their associated span
-        docs: &'a [(String, Span)],
-        /// Index of the current line we are parsing
-        line: usize,
-        /// Whether we are in a link
-        link: bool,
-        /// Whether we are at the beginning of a line
-        new_line: bool,
-        /// Whether we were to the end of a line last time `next` was called
-        reset: bool,
-        /// The position of the current character within the current line
-        pos: usize,
+    let mut current = 0;
+    for &mut (ref mut offset, _) in &mut spans {
+        let offset_copy = *offset;
+        *offset = current;
+        current += offset_copy;
     }
 
-    impl<'a> Parser<'a> {
-        fn advance_begin(&mut self) {
-            self.current_word_begin = self.pos;
-        }
-
-        fn line(&self) -> (&'a str, Span) {
-            let (ref doc, span) = self.docs[self.line];
-            (doc, span)
-        }
-
-        fn peek(&self) -> Option<char> {
-            self.line().0[self.pos..].chars().next()
-        }
-
-        #[allow(while_let_on_iterator)] // borrowck complains about for
-        fn jump_to(&mut self, n: char) -> Result<bool, ()> {
-            while let Some((new_line, c)) = self.next() {
-                if c == n {
-                    self.advance_begin();
-                    return Ok(new_line);
-                }
+    if !doc.is_empty() {
+        let parser = Parser::new(pulldown_cmark::Parser::new(&doc));
+        let parser = parser.coalesce(|x, y| {
+            use pulldown_cmark::Event::*;
+
+            let x_offset = x.0;
+            let y_offset = y.0;
+
+            match (x.1, y.1) {
+                (Text(x), Text(y)) => {
+                    let mut x = x.into_owned();
+                    x.push_str(&y);
+                    Ok((x_offset, Text(x.into())))
+                },
+                (x, y) => Err(((x_offset, x), (y_offset, y))),
             }
-
-            Err(())
-        }
-
-        fn next_line(&mut self) {
-            self.pos = 0;
-            self.current_word_begin = 0;
-            self.line += 1;
-            self.new_line = true;
-        }
-
-        fn put_back(&mut self, c: char) {
-            self.pos -= c.len_utf8();
-        }
-
-        #[allow(cast_possible_truncation)]
-        fn word(&self) -> (&'a str, Span) {
-            let begin = self.current_word_begin;
-            let end = self.pos;
-
-            debug_assert_eq!(end as u32 as usize, end);
-            debug_assert_eq!(begin as u32 as usize, begin);
-
-            let (doc, mut span) = self.line();
-            span.hi = span.lo + BytePos(end as u32);
-            span.lo = span.lo + BytePos(begin as u32);
-
-            (&doc[begin..end], span)
-        }
+        });
+        check_doc(cx, valid_idents, parser, &spans);
     }
+}
 
-    impl<'a> Iterator for Parser<'a> {
-        type Item = (bool, char);
-
-        fn next(&mut self) -> Option<(bool, char)> {
-            while self.line < self.docs.len() {
-                if self.reset {
-                    self.line += 1;
-                    self.reset = false;
-                    self.pos = 0;
-                    self.current_word_begin = 0;
-                }
-
-                let mut chars = self.line().0[self.pos..].chars();
-                let c = chars.next();
-
-                if let Some(c) = c {
-                    self.pos += c.len_utf8();
-                    let new_line = self.new_line;
-                    self.new_line = c == '\n' || (self.new_line && c.is_whitespace());
-                    return Some((new_line, c));
-                } else if self.line == self.docs.len() - 1 {
-                    return None;
-                } else {
-                    self.new_line = true;
-                    self.reset = true;
-                    self.pos += 1;
-                    return Some((true, '\n'));
+fn check_doc<'a, Events: Iterator<Item = (usize, pulldown_cmark::Event<'a>)>>(
+    cx: &EarlyContext,
+    valid_idents: &[String],
+    docs: Events,
+    spans: &[(usize, Span)],
+) {
+    use pulldown_cmark::Event::*;
+    use pulldown_cmark::Tag::*;
+
+    let mut in_code = false;
+
+    for (offset, event) in docs {
+        match event {
+            Start(CodeBlock(_)) |
+            Start(Code) => in_code = true,
+            End(CodeBlock(_)) |
+            End(Code) => in_code = false,
+            Start(_tag) | End(_tag) => (), // We don't care about other tags
+            Html(_html) |
+            InlineHtml(_html) => (), // HTML is weird, just ignore it
+            SoftBreak => (),
+            HardBreak => (),
+            FootnoteReference(text) |
+            Text(text) => {
+                if !in_code {
+                    let index = match spans.binary_search_by(|c| c.0.cmp(&offset)) {
+                        Ok(o) => o,
+                        Err(e) => e - 1,
+                    };
+
+                    let (begin, span) = spans[index];
+
+                    // Adjust for the begining of the current `Event`
+                    let span = Span {
+                        lo: span.lo + BytePos::from_usize(offset - begin),
+                        ..span
+                    };
+
+                    check_text(cx, valid_idents, &text, span);
                 }
-            }
-
-            None
+            },
         }
     }
+}
 
-    let mut parser = Parser {
-        current_word_begin: 0,
-        docs: docs,
-        line: 0,
-        link: false,
-        new_line: true,
-        reset: false,
-        pos: 0,
-    };
-
-    /// Check for fanced code block.
-    macro_rules! check_block {
-        ($parser:expr, $c:tt, $new_line:expr) => {{
-            check_block!($parser, $c, $c, $new_line)
-        }};
-
-        ($parser:expr, $c:pat, $c_expr:expr, $new_line:expr) => {{
-            fn check_block(parser: &mut Parser, new_line: bool) -> Result<bool, ()> {
-                if new_line {
-                    let mut lookup_parser = parser.clone();
-                    if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) {
-                        *parser = lookup_parser;
-                        // 3 or more ` or ~ open a code block to be closed with the same number of ` or ~
-                        let mut open_count = 3;
-                        while let Some((false, $c)) = parser.next() {
-                            open_count += 1;
-                        }
-
-                        loop {
-                            loop {
-                                if try!(parser.jump_to($c_expr)) {
-                                    break;
-                                }
-                            }
-
-                            lookup_parser = parser.clone();
-                            if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) {
-                                let mut close_count = 3;
-                                while let Some((false, $c)) = lookup_parser.next() {
-                                    close_count += 1;
-                                }
-
-                                if close_count == open_count {
-                                    *parser = lookup_parser;
-                                    return Ok(true);
-                                }
-                            }
-                        }
-                    }
-                }
-
-                Ok(false)
-            }
-
-            check_block(&mut $parser, $new_line)
-        }};
-    }
-
-    loop {
-        match parser.next() {
-            Some((new_line, c)) => {
-                match c {
-                    '#' if new_line => { // don’t warn on titles
-                        parser.next_line();
-                    }
-                    '`' => {
-                        if try!(check_block!(parser, '`', new_line)) {
-                            continue;
-                        }
-
-                        try!(parser.jump_to('`')); // not a code block, just inline code
-                    }
-                    '~' => {
-                        if try!(check_block!(parser, '~', new_line)) {
-                            continue;
-                        }
-
-                        // ~ does not introduce inline code, but two of them introduce
-                        // strikethrough. Too bad for the consistency but we don't care about
-                        // strikethrough.
-                    }
-                    '[' => {
-                        // Check for a reference definition `[foo]:` at the beginning of a line
-                        let mut link = true;
-
-                        if new_line {
-                            let mut lookup_parser = parser.clone();
-                            if lookup_parser.any(|(_, c)| c == ']') {
-                                if let Some((_, ':')) = lookup_parser.next() {
-                                    lookup_parser.next_line();
-                                    parser = lookup_parser;
-                                    link = false;
-                                }
-                            }
-                        }
+fn check_text(cx: &EarlyContext, valid_idents: &[String], text: &str, span: Span) {
+    for word in text.split_whitespace() {
+        // Trim punctuation as in `some comment (see foo::bar).`
+        //                                                   ^^
+        // Or even as in `_foo bar_` which is emphasized.
+        let word = word.trim_matches(|c: char| !c.is_alphanumeric());
 
-                        parser.advance_begin();
-                        parser.link = link;
-                    }
-                    ']' if parser.link => {
-                        parser.link = false;
-
-                        match parser.peek() {
-                            Some('(') => {
-                                try!(parser.jump_to(')'));
-                            }
-                            Some('[') => {
-                                try!(parser.jump_to(']'));
-                            }
-                            Some(_) => continue,
-                            None => return Err(()),
-                        }
-                    }
-                    c if !is_path_char(c) => {
-                        parser.advance_begin();
-                    }
-                    _ => {
-                        if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) {
-                            parser.put_back(c);
-                        }
+        if valid_idents.iter().any(|i| i == word) {
+            continue;
+        }
 
-                        let (word, span) = parser.word();
-                        check_word(cx, valid_idents, word, span);
-                        parser.advance_begin();
-                    }
-                }
+        // Adjust for the current word
+        let offset = word.as_ptr() as usize - text.as_ptr() as usize;
+        let span = Span {
+            lo: span.lo + BytePos::from_usize(offset),
+            hi: span.lo + BytePos::from_usize(offset + word.len()),
+            ..span
+        };
 
-            }
-            None => break,
-        }
+        check_word(cx, word, span);
     }
-
-    Ok(())
 }
 
-fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) {
-    /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is
-    /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok).
+fn check_word(cx: &EarlyContext, word: &str, span: Span) {
+    /// Checks if a string is camel-case, ie. contains at least two uppercase
+    /// letter (`Clippy` is
+    /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded
+    /// (`IDs` is ok).
     fn is_camel_case(s: &str) -> bool {
         if s.starts_with(|c: char| c.is_digit(10)) {
             return false;
@@ -386,28 +279,20 @@ fn is_camel_case(s: &str) -> bool {
             s
         };
 
-        s.chars().all(char::is_alphanumeric) &&
-        s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
-        s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
+        s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
+            s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
     }
 
     fn has_underscore(s: &str) -> bool {
         s != "_" && !s.contains("\\_") && s.contains('_')
     }
 
-    // Trim punctuation as in `some comment (see foo::bar).`
-    //                                                   ^^
-    // Or even as in `_foo bar_` which is emphasized.
-    let word = word.trim_matches(|c: char| !c.is_alphanumeric());
-
-    if valid_idents.iter().any(|i| i == word) {
-        return;
-    }
-
     if has_underscore(word) || word.contains("::") || is_camel_case(word) {
-        span_lint(cx,
-                  DOC_MARKDOWN,
-                  span,
-                  &format!("you should put `{}` between ticks in the documentation", word));
+        span_lint(
+            cx,
+            DOC_MARKDOWN,
+            span,
+            &format!("you should put `{}` between ticks in the documentation", word),
+        );
     }
 }