remove stars at the beginning of multiline comments

[rust.git] / clippy_lints / src / doc.rs
diff --git a/clippy_lints/src/doc.rs b/clippy_lints/src/doc.rs

index af1fbaa0f7da1c727380b38d86a04acff9ba7283..6dbf2ea959e6a667f1be86e3a52079e48a3bf570 100644 (file)
--- a/clippy_lints/src/doc.rs
+++ b/clippy_lints/src/doc.rs
@@ -1,26 +1,33 @@
+use itertools::Itertools;
+use pulldown_cmark;
  use rustc::lint::*;
  use syntax::ast;
  use syntax::codemap::{Span, BytePos};
+use syntax_pos::Pos;
  use utils::span_lint;
  
-/// **What it does:** This lint checks for the presence of `_`, `::` or camel-case words outside
-/// ticks in documentation.
+/// **What it does:** Checks for the presence of `_`, `::` or camel-case words
+/// outside ticks in documentation.
  ///
-/// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and camel-case probably
-/// indicates some code which should be included between ticks. `_` can also be used for empasis in
-/// markdown, this lint tries to consider that.
+/// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and
+/// camel-case probably indicates some code which should be included between
+/// ticks. `_` can also be used for empasis in markdown, this lint tries to
+/// consider that.
  ///
-/// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks for is limited.
+/// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks
+/// for is limited, and there are still false positives.
  ///
  /// **Examples:**
  /// ```rust
-/// /// Do something with the foo_bar parameter. See also that::other::module::foo.
+/// /// Do something with the foo_bar parameter. See also
+/// that::other::module::foo.
  /// // ^ `foo_bar` and `that::other::module::foo` should be ticked.
  /// fn doit(foo_bar) { .. }
  /// ```
  declare_lint! {
-    pub DOC_MARKDOWN, Warn,
-    "checks for the presence of `_`, `::` or camel-case outside ticks in documentation"
+    pub DOC_MARKDOWN,
+    Warn,
+    "presence of `_`, `::` or camel-case outside backticks in documentation"
  }
  
  #[derive(Clone)]
@@ -50,236 +57,217 @@ fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) {
      }
  }
  
-pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
-    let mut docs = vec![];
-
-    let mut in_multiline = false;
-    for attr in attrs {
-        if attr.node.is_sugared_doc {
-            if let ast::MetaItemKind::NameValue(_, ref doc) = attr.node.value.node {
-                if let ast::LitKind::Str(ref doc, _) = doc.node {
-                    // doc comments start with `///` or `//!`
-                    let real_doc = &doc[3..];
-                    let mut span = attr.span;
-                    span.lo = span.lo + BytePos(3);
-
-                    // check for multiline code blocks
-                    if real_doc.trim_left().starts_with("```") {
-                        in_multiline = !in_multiline;
-                    } else if !in_multiline {
-                        docs.push((real_doc, span));
-                    }
-                }
-            }
-        }
-    }
+struct Parser<'a> {
+    parser: pulldown_cmark::Parser<'a>,
+}
  
-    if !docs.is_empty() {
-        let _ = check_doc(cx, valid_idents, &docs);
+impl<'a> Parser<'a> {
+    fn new(parser: pulldown_cmark::Parser<'a>) -> Parser<'a> {
+        Self { parser: parser }
      }
  }
  
-#[allow(while_let_loop)] // #362
-pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(&str, Span)]) -> Result<(), ()> {
-    // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
-    // There really is no markdown specification that would disambiguate this properly. This is
-    // what GitHub and Rustdoc do:
-    //
-    // foo_bar test_quz    → foo_bar test_quz
-    // foo_bar_baz         → foo_bar_baz (note that the “official” spec says this should be emphasized)
-    // _foo bar_ test_quz_ → <em>foo bar</em> test_quz_
-    // \_foo bar\_         → _foo bar_
-    // (_baz_)             → (<em>baz</em>)
-    // foo _ bar _ baz     → foo _ bar _ baz
-
-    /// Character that can appear in a path
-    fn is_path_char(c: char) -> bool {
-        match c {
-            t if t.is_alphanumeric() => true,
-            ':' | '_' => true,
-            _ => false,
-        }
-    }
+impl<'a> Iterator for Parser<'a> {
+    type Item = (usize, pulldown_cmark::Event<'a>);
  
-    #[derive(Clone, Debug)]
-    /// This type is used to iterate through the documentation characters, keeping the span at the
-    /// same time.
-    struct Parser<'a> {
-        /// First byte of the current potential match
-        current_word_begin: usize,
-        /// List of lines and their associated span
-        docs: &'a[(&'a str, Span)],
-        /// Index of the current line we are parsing
-        line: usize,
-        /// Whether we are in a link
-        link: bool,
-        /// Whether we are at the beginning of a line
-        new_line: bool,
-        /// Whether we were to the end of a line last time `next` was called
-        reset: bool,
-        /// The position of the current character within the current line
-        pos: usize,
+    fn next(&mut self) -> Option<Self::Item> {
+        let offset = self.parser.get_offset();
+        self.parser.next().map(|event| (offset, event))
      }
+}
  
-    impl<'a> Parser<'a> {
-        fn advance_begin(&mut self) {
-            self.current_word_begin = self.pos;
+/// Cleanup documentation decoration (`///` and such).
+///
+/// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or
+/// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we
+/// need to keep track of
+/// the spans but this function is inspired from the later.
+#[allow(cast_possible_truncation)]
+pub fn strip_doc_comment_decoration(comment: &str, span: Span) -> (String, Vec<(usize, Span)>) {
+    // one-line comments lose their prefix
+    const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
+    for prefix in ONELINERS {
+        if comment.starts_with(*prefix) {
+            let doc = &comment[prefix.len()..];
+            let mut doc = doc.to_owned();
+            doc.push('\n');
+            return (
+                doc.to_owned(),
+                vec![
+                    (
+                        doc.len(),
+                        Span {
+                            lo: span.lo + BytePos(prefix.len() as u32),
+                            ..span
+                        }
+                    ),
+                ],
+            );
          }
+    }
  
-        fn line(&self) -> (&'a str, Span) {
-            self.docs[self.line]
+    if comment.starts_with("/*") {
+        let doc = &comment[3..comment.len() - 2];
+        let mut sizes = vec![];
+        let mut contains_initial_stars = false;
+        for line in doc.lines() {
+            let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
+            debug_assert_eq!(offset as u32 as usize, offset);
+            contains_initial_stars |= line.trim_left().starts_with('*');
+            // +1 for the newline
+            sizes.push((
+                line.len() + 1,
+                Span {
+                    lo: span.lo + BytePos(offset as u32),
+                    ..span
+                },
+            ));
          }
-
-        fn peek(&self) -> Option<char> {
-            self.line().0[self.pos..].chars().next()
+        if !contains_initial_stars {
+            return (doc.to_string(), sizes);
          }
-
-        #[allow(while_let_on_iterator)] // borrowck complains about for
-        fn jump_to(&mut self, n: char) -> Result<(), ()> {
-            while let Some((_, c)) = self.next() {
-                if c == n {
-                    self.advance_begin();
-                    return Ok(());
+        // remove the initial '*'s if any
+        let mut no_stars = String::with_capacity(doc.len());
+        for line in doc.lines() {
+            let mut chars = line.chars();
+            while let Some(c) = chars.next() {
+                if c.is_whitespace() {
+                    no_stars.push(c);
+                } else {
+                    no_stars.push(if c == '*' { ' ' } else { c });
+                    break;
                  }
              }
-
-            Err(())
-        }
-
-        fn next_line(&mut self) {
-            self.pos = 0;
-            self.current_word_begin = 0;
-            self.line += 1;
-            self.new_line = true;
+            no_stars.push_str(chars.as_str());
+            no_stars.push('\n');
          }
+        return (no_stars, sizes);
+    }
  
-        fn put_back(&mut self, c: char) {
-            self.pos -= c.len_utf8();
-        }
-
-        #[allow(cast_possible_truncation)]
-        fn word(&self) -> (&'a str, Span) {
-            let begin = self.current_word_begin;
-            let end = self.pos;
-
-            debug_assert_eq!(end as u32 as usize, end);
-            debug_assert_eq!(begin as u32 as usize, begin);
+    panic!("not a doc-comment: {}", comment);
+}
  
-            let (doc, mut span) = self.line();
-            span.hi = span.lo + BytePos(end as u32);
-            span.lo = span.lo + BytePos(begin as u32);
+pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
+    let mut doc = String::new();
+    let mut spans = vec![];
  
-            (&doc[begin..end], span)
+    for attr in attrs {
+        if attr.is_sugared_doc {
+            if let Some(ref current) = attr.value_str() {
+                let current = current.to_string();
+                let (current, current_spans) = strip_doc_comment_decoration(&current, attr.span);
+                spans.extend_from_slice(&current_spans);
+                doc.push_str(&current);
+            }
+        } else if let Some(name) = attr.name() {
+            // ignore mix of sugared and non-sugared doc
+            if name == "doc" {
+                return;
+            }
          }
      }
  
-    impl<'a> Iterator for Parser<'a> {
-        type Item = (bool, char);
-
-        fn next(&mut self) -> Option<(bool, char)> {
-            while self.line < self.docs.len() {
-                if self.reset {
-                    self.line += 1;
-                    self.reset = false;
-                    self.pos = 0;
-                    self.current_word_begin = 0;
-                }
-
-                let mut chars = self.line().0[self.pos..].chars();
-                let c = chars.next();
+    let mut current = 0;
+    for &mut (ref mut offset, _) in &mut spans {
+        let offset_copy = *offset;
+        *offset = current;
+        current += offset_copy;
+    }
  
-                if let Some(c) = c {
-                    self.pos += c.len_utf8();
-                    let new_line = self.new_line;
-                    self.new_line = c == '\n' || (self.new_line && c.is_whitespace());
-                    return Some((new_line, c));
-                } else if self.line == self.docs.len() - 1 {
-                    return None;
-                } else {
-                    self.new_line = true;
-                    self.reset = true;
-                    self.pos += 1;
-                    return Some((true, '\n'));
-                }
+    if !doc.is_empty() {
+        let parser = Parser::new(pulldown_cmark::Parser::new(&doc));
+        let parser = parser.coalesce(|x, y| {
+            use pulldown_cmark::Event::*;
+
+            let x_offset = x.0;
+            let y_offset = y.0;
+
+            match (x.1, y.1) {
+                (Text(x), Text(y)) => {
+                    let mut x = x.into_owned();
+                    x.push_str(&y);
+                    Ok((x_offset, Text(x.into())))
+                },
+                (x, y) => Err(((x_offset, x), (y_offset, y))),
              }
+        });
+        check_doc(cx, valid_idents, parser, &spans);
+    }
+}
  
-            None
+fn check_doc<'a, Events: Iterator<Item = (usize, pulldown_cmark::Event<'a>)>>(
+    cx: &EarlyContext,
+    valid_idents: &[String],
+    docs: Events,
+    spans: &[(usize, Span)],
+) {
+    use pulldown_cmark::Event::*;
+    use pulldown_cmark::Tag::*;
+
+    let mut in_code = false;
+
+    for (offset, event) in docs {
+        match event {
+            Start(CodeBlock(_)) |
+            Start(Code) => in_code = true,
+            End(CodeBlock(_)) |
+            End(Code) => in_code = false,
+            Start(_tag) | End(_tag) => (), // We don't care about other tags
+            Html(_html) |
+            InlineHtml(_html) => (), // HTML is weird, just ignore it
+            SoftBreak => (),
+            HardBreak => (),
+            FootnoteReference(text) |
+            Text(text) => {
+                if !in_code {
+                    let index = match spans.binary_search_by(|c| c.0.cmp(&offset)) {
+                        Ok(o) => o,
+                        Err(e) => e - 1,
+                    };
+
+                    let (begin, span) = spans[index];
+
+                    // Adjust for the begining of the current `Event`
+                    let span = Span {
+                        lo: span.lo + BytePos::from_usize(offset - begin),
+                        ..span
+                    };
+
+                    check_text(cx, valid_idents, &text, span);
+                }
+            },
          }
      }
+}
  
-    let mut parser = Parser {
-        current_word_begin: 0,
-        docs: docs,
-        line: 0,
-        link: false,
-        new_line: true,
-        reset: false,
-        pos: 0,
-    };
-
-    loop {
-        match parser.next() {
-            Some((new_line, c)) => {
-                match c {
-                    '#' if new_line => { // don’t warn on titles
-                        parser.next_line();
-                    }
-                    '`' => {
-                        try!(parser.jump_to('`'));
-                    }
-                    '[' => {
-                        // Check for a reference definition `[foo]:` at the beginning of a line
-                        let mut link = true;
-
-                        if new_line {
-                            let mut lookup_parser = parser.clone();
-                            if let Some(_) = lookup_parser.find(|&(_, c)| c == ']') {
-                                if let Some((_, ':')) = lookup_parser.next() {
-                                    lookup_parser.next_line();
-                                    parser = lookup_parser;
-                                    link = false;
-                                }
-                            }
-                        }
+fn check_text(cx: &EarlyContext, valid_idents: &[String], text: &str, span: Span) {
+    for word in text.split_whitespace() {
+        // Trim punctuation as in `some comment (see foo::bar).`
+        //                                                   ^^
+        // Or even as in `_foo bar_` which is emphasized.
+        let word = word.trim_matches(|c: char| !c.is_alphanumeric());
  
-                        parser.advance_begin();
-                        parser.link = link;
-                    }
-                    ']' if parser.link => {
-                        parser.link = false;
-
-                        match parser.peek() {
-                            Some('(') => try!(parser.jump_to(')')),
-                            Some('[') => try!(parser.jump_to(']')),
-                            Some(_) => continue,
-                            None => return Err(()),
-                        }
-                    }
-                    c if !is_path_char(c) => {
-                        parser.advance_begin();
-                    }
-                    _ => {
-                        if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) {
-                            parser.put_back(c);
-                        }
+        if valid_idents.iter().any(|i| i == word) {
+            continue;
+        }
  
-                        let (word, span) = parser.word();
-                        check_word(cx, valid_idents, word, span);
-                        parser.advance_begin();
-                    }
-                }
+        // Adjust for the current word
+        let offset = word.as_ptr() as usize - text.as_ptr() as usize;
+        let span = Span {
+            lo: span.lo + BytePos::from_usize(offset),
+            hi: span.lo + BytePos::from_usize(offset + word.len()),
+            ..span
+        };
  
-            }
-            None => break,
-        }
+        check_word(cx, word, span);
      }
-
-    Ok(())
  }
  
-fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) {
-    /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is
-    /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok).
+fn check_word(cx: &EarlyContext, word: &str, span: Span) {
+    /// Checks if a string is camel-case, ie. contains at least two uppercase
+    /// letter (`Clippy` is
+    /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded
+    /// (`IDs` is ok).
      fn is_camel_case(s: &str) -> bool {
          if s.starts_with(|c: char| c.is_digit(10)) {
              return false;
@@ -291,28 +279,20 @@ fn is_camel_case(s: &str) -> bool {
              s
          };
  
-        s.chars().all(char::is_alphanumeric) &&
-        s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
-        s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
+        s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
+            s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
      }
  
      fn has_underscore(s: &str) -> bool {
          s != "_" && !s.contains("\\_") && s.contains('_')
      }
  
-    // Trim punctuation as in `some comment (see foo::bar).`
-    //                                                   ^^
-    // Or even as in `_foo bar_` which is emphasized.
-    let word = word.trim_matches(|c: char| !c.is_alphanumeric());
-
-    if valid_idents.iter().any(|i| i == word) {
-        return;
-    }
-
      if has_underscore(word) || word.contains("::") || is_camel_case(word) {
-        span_lint(cx,
-                  DOC_MARKDOWN,
-                  span,
-                  &format!("you should put `{}` between ticks in the documentation", word));
+        span_lint(
+            cx,
+            DOC_MARKDOWN,
+            span,
+            &format!("you should put `{}` between ticks in the documentation", word),
+        );
      }
  }