Run rustfmt

[rust.git] / clippy_lints / src / doc.rs
diff --git a/clippy_lints/src/doc.rs b/clippy_lints/src/doc.rs

index 20c82ee466074070a40b146538774a7648ea3c73..8977ea437d141c3d78c5c3270172b31eaf044f38 100644 (file)
--- a/clippy_lints/src/doc.rs
+++ b/clippy_lints/src/doc.rs
@@ -1,6 +1,9 @@
+use itertools::Itertools;
+use pulldown_cmark;
  use rustc::lint::*;
  use syntax::ast;
  use syntax::codemap::{Span, BytePos};
+use syntax_pos::Pos;
  use utils::span_lint;
  
  /// **What it does:** Checks for the presence of `_`, `::` or camel-case words
@@ -16,7 +19,8 @@
  ///
  /// **Examples:**
  /// ```rust
-/// /// Do something with the foo_bar parameter. See also that::other::module::foo.
+/// /// Do something with the foo_bar parameter. See also
+/// that::other::module::foo.
  /// // ^ `foo_bar` and `that::other::module::foo` should be ticked.
  /// fn doit(foo_bar) { .. }
  /// ```
@@ -33,7 +37,7 @@ pub struct Doc {
  
  impl Doc {
      pub fn new(valid_idents: Vec<String>) -> Self {
-        Doc { valid_idents: valid_idents }
+        Self { valid_idents: valid_idents }
      }
  }
  
@@ -53,326 +57,202 @@ fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) {
      }
  }
  
+struct Parser<'a> {
+    parser: pulldown_cmark::Parser<'a>,
+}
+
+impl<'a> Parser<'a> {
+    fn new(parser: pulldown_cmark::Parser<'a>) -> Self {
+        Self { parser: parser }
+    }
+}
+
+impl<'a> Iterator for Parser<'a> {
+    type Item = (usize, pulldown_cmark::Event<'a>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let offset = self.parser.get_offset();
+        self.parser.next().map(|event| (offset, event))
+    }
+}
+
  /// Cleanup documentation decoration (`///` and such).
  ///
  /// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or
-/// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we need to keep track of
-/// the span but this function is inspired from the later.
+/// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we
+/// need to keep track of
+/// the spans but this function is inspired from the later.
  #[allow(cast_possible_truncation)]
-pub fn strip_doc_comment_decoration((comment, span): (String, Span)) -> Vec<(String, Span)> {
+pub fn strip_doc_comment_decoration(comment: &str, span: Span) -> (String, Vec<(usize, Span)>) {
      // one-line comments lose their prefix
      const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
      for prefix in ONELINERS {
          if comment.starts_with(*prefix) {
-            return vec![(comment[prefix.len()..].to_owned(),
-                         Span { lo: span.lo + BytePos(prefix.len() as u32), ..span })];
+            let doc = &comment[prefix.len()..];
+            let mut doc = doc.to_owned();
+            doc.push('\n');
+            return (
+                doc.to_owned(),
+                vec![
+                    (doc.len(), span.with_lo(span.lo() + BytePos(prefix.len() as u32))),
+                ],
+            );
          }
      }
  
      if comment.starts_with("/*") {
-        return comment[3..comment.len() - 2]
-            .lines()
-            .map(|line| {
-                let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
-                debug_assert_eq!(offset as u32 as usize, offset);
-
-                (line.to_owned(), Span { lo: span.lo + BytePos(offset as u32), ..span })
-            })
-            .collect();
+        let doc = &comment[3..comment.len() - 2];
+        let mut sizes = vec![];
+        let mut contains_initial_stars = false;
+        for line in doc.lines() {
+            let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
+            debug_assert_eq!(offset as u32 as usize, offset);
+            contains_initial_stars |= line.trim_left().starts_with('*');
+            // +1 for the newline
+            sizes.push((line.len() + 1, span.with_lo(span.lo() + BytePos(offset as u32))));
+        }
+        if !contains_initial_stars {
+            return (doc.to_string(), sizes);
+        }
+        // remove the initial '*'s if any
+        let mut no_stars = String::with_capacity(doc.len());
+        for line in doc.lines() {
+            let mut chars = line.chars();
+            while let Some(c) = chars.next() {
+                if c.is_whitespace() {
+                    no_stars.push(c);
+                } else {
+                    no_stars.push(if c == '*' { ' ' } else { c });
+                    break;
+                }
+            }
+            no_stars.push_str(chars.as_str());
+            no_stars.push('\n');
+        }
+        return (no_stars, sizes);
      }
  
      panic!("not a doc-comment: {}", comment);
  }
  
  pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
-    let mut docs = vec![];
+    let mut doc = String::new();
+    let mut spans = vec![];
  
      for attr in attrs {
          if attr.is_sugared_doc {
-            if let ast::MetaItemKind::NameValue(ref doc) = attr.value.node {
-                if let ast::LitKind::Str(ref doc, _) = doc.node {
-                    let doc = (*doc.as_str()).to_owned();
-                    docs.extend_from_slice(&strip_doc_comment_decoration((doc, attr.span)));
-                }
+            if let Some(ref current) = attr.value_str() {
+                let current = current.to_string();
+                let (current, current_spans) = strip_doc_comment_decoration(&current, attr.span);
+                spans.extend_from_slice(&current_spans);
+                doc.push_str(&current);
+            }
+        } else if let Some(name) = attr.name() {
+            // ignore mix of sugared and non-sugared doc
+            if name == "doc" {
+                return;
              }
          }
      }
  
-    if !docs.is_empty() {
-        let _ = check_doc(cx, valid_idents, &docs);
-    }
-}
-
-#[allow(while_let_loop)] // #362
-fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(String, Span)]) -> Result<(), ()> {
-    // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
-    // There really is no markdown specification that would disambiguate this properly. This is
-    // what GitHub and Rustdoc do:
-    //
-    // foo_bar test_quz    → foo_bar test_quz
-    // foo_bar_baz         → foo_bar_baz (note that the “official” spec says this should be emphasized)
-    // _foo bar_ test_quz_ → <em>foo bar</em> test_quz_
-    // \_foo bar\_         → _foo bar_
-    // (_baz_)             → (<em>baz</em>)
-    // foo _ bar _ baz     → foo _ bar _ baz
-
-    /// Character that can appear in a path
-    fn is_path_char(c: char) -> bool {
-        match c {
-            t if t.is_alphanumeric() => true,
-            ':' | '_' => true,
-            _ => false,
-        }
-    }
-
-    #[derive(Clone, Debug)]
-    /// This type is used to iterate through the documentation characters, keeping the span at the
-    /// same time.
-    struct Parser<'a> {
-        /// First byte of the current potential match
-        current_word_begin: usize,
-        /// List of lines and their associated span
-        docs: &'a [(String, Span)],
-        /// Index of the current line we are parsing
-        line: usize,
-        /// Whether we are in a link
-        link: bool,
-        /// Whether we are at the beginning of a line
-        new_line: bool,
-        /// Whether we were to the end of a line last time `next` was called
-        reset: bool,
-        /// The position of the current character within the current line
-        pos: usize,
+    let mut current = 0;
+    for &mut (ref mut offset, _) in &mut spans {
+        let offset_copy = *offset;
+        *offset = current;
+        current += offset_copy;
      }
  
-    impl<'a> Parser<'a> {
-        fn advance_begin(&mut self) {
-            self.current_word_begin = self.pos;
-        }
-
-        fn line(&self) -> (&'a str, Span) {
-            let (ref doc, span) = self.docs[self.line];
-            (doc, span)
-        }
-
-        fn peek(&self) -> Option<char> {
-            self.line().0[self.pos..].chars().next()
-        }
-
-        #[allow(while_let_on_iterator)] // borrowck complains about for
-        fn jump_to(&mut self, n: char) -> Result<bool, ()> {
-            while let Some((new_line, c)) = self.next() {
-                if c == n {
-                    self.advance_begin();
-                    return Ok(new_line);
-                }
+    if !doc.is_empty() {
+        let parser = Parser::new(pulldown_cmark::Parser::new(&doc));
+        let parser = parser.coalesce(|x, y| {
+            use pulldown_cmark::Event::*;
+
+            let x_offset = x.0;
+            let y_offset = y.0;
+
+            match (x.1, y.1) {
+                (Text(x), Text(y)) => {
+                    let mut x = x.into_owned();
+                    x.push_str(&y);
+                    Ok((x_offset, Text(x.into())))
+                },
+                (x, y) => Err(((x_offset, x), (y_offset, y))),
              }
-
-            Err(())
-        }
-
-        fn next_line(&mut self) {
-            self.pos = 0;
-            self.current_word_begin = 0;
-            self.line += 1;
-            self.new_line = true;
-        }
-
-        fn put_back(&mut self, c: char) {
-            self.pos -= c.len_utf8();
-        }
-
-        #[allow(cast_possible_truncation)]
-        fn word(&self) -> (&'a str, Span) {
-            let begin = self.current_word_begin;
-            let end = self.pos;
-
-            debug_assert_eq!(end as u32 as usize, end);
-            debug_assert_eq!(begin as u32 as usize, begin);
-
-            let (doc, mut span) = self.line();
-            span.hi = span.lo + BytePos(end as u32);
-            span.lo = span.lo + BytePos(begin as u32);
-
-            (&doc[begin..end], span)
-        }
+        });
+        check_doc(cx, valid_idents, parser, &spans);
      }
+}
  
-    impl<'a> Iterator for Parser<'a> {
-        type Item = (bool, char);
-
-        fn next(&mut self) -> Option<(bool, char)> {
-            while self.line < self.docs.len() {
-                if self.reset {
-                    self.line += 1;
-                    self.reset = false;
-                    self.pos = 0;
-                    self.current_word_begin = 0;
-                }
-
-                let mut chars = self.line().0[self.pos..].chars();
-                let c = chars.next();
-
-                if let Some(c) = c {
-                    self.pos += c.len_utf8();
-                    let new_line = self.new_line;
-                    self.new_line = c == '\n' || (self.new_line && c.is_whitespace());
-                    return Some((new_line, c));
-                } else if self.line == self.docs.len() - 1 {
-                    return None;
-                } else {
-                    self.new_line = true;
-                    self.reset = true;
-                    self.pos += 1;
-                    return Some((true, '\n'));
+fn check_doc<'a, Events: Iterator<Item = (usize, pulldown_cmark::Event<'a>)>>(
+    cx: &EarlyContext,
+    valid_idents: &[String],
+    docs: Events,
+    spans: &[(usize, Span)],
+) {
+    use pulldown_cmark::Event::*;
+    use pulldown_cmark::Tag::*;
+
+    let mut in_code = false;
+
+    for (offset, event) in docs {
+        match event {
+            Start(CodeBlock(_)) |
+            Start(Code) => in_code = true,
+            End(CodeBlock(_)) |
+            End(Code) => in_code = false,
+            Start(_tag) | End(_tag) => (), // We don't care about other tags
+            Html(_html) |
+            InlineHtml(_html) => (), // HTML is weird, just ignore it
+            SoftBreak => (),
+            HardBreak => (),
+            FootnoteReference(text) |
+            Text(text) => {
+                if !in_code {
+                    let index = match spans.binary_search_by(|c| c.0.cmp(&offset)) {
+                        Ok(o) => o,
+                        Err(e) => e - 1,
+                    };
+
+                    let (begin, span) = spans[index];
+
+                    // Adjust for the begining of the current `Event`
+                    let span = span.with_lo(span.lo() + BytePos::from_usize(offset - begin));
+
+                    check_text(cx, valid_idents, &text, span);
                  }
-            }
-
-            None
+            },
          }
      }
+}
  
-    let mut parser = Parser {
-        current_word_begin: 0,
-        docs: docs,
-        line: 0,
-        link: false,
-        new_line: true,
-        reset: false,
-        pos: 0,
-    };
-
-    /// Check for fanced code block.
-    macro_rules! check_block {
-        ($parser:expr, $c:tt, $new_line:expr) => {{
-            check_block!($parser, $c, $c, $new_line)
-        }};
-
-        ($parser:expr, $c:pat, $c_expr:expr, $new_line:expr) => {{
-            fn check_block(parser: &mut Parser, new_line: bool) -> Result<bool, ()> {
-                if new_line {
-                    let mut lookup_parser = parser.clone();
-                    if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) {
-                        *parser = lookup_parser;
-                        // 3 or more ` or ~ open a code block to be closed with the same number of ` or ~
-                        let mut open_count = 3;
-                        while let Some((false, $c)) = parser.next() {
-                            open_count += 1;
-                        }
-
-                        loop {
-                            loop {
-                                if try!(parser.jump_to($c_expr)) {
-                                    break;
-                                }
-                            }
-
-                            lookup_parser = parser.clone();
-                            let a = lookup_parser.next();
-                            let b = lookup_parser.next();
-                            if let (Some((false, $c)), Some((false, $c))) = (a, b) {
-                                let mut close_count = 3;
-                                while let Some((false, $c)) = lookup_parser.next() {
-                                    close_count += 1;
-                                }
-
-                                if close_count == open_count {
-                                    *parser = lookup_parser;
-                                    return Ok(true);
-                                }
-                            }
-                        }
-                    }
-                }
+fn check_text(cx: &EarlyContext, valid_idents: &[String], text: &str, span: Span) {
+    for word in text.split_whitespace() {
+        // Trim punctuation as in `some comment (see foo::bar).`
+        //                                                   ^^
+        // Or even as in `_foo bar_` which is emphasized.
+        let word = word.trim_matches(|c: char| !c.is_alphanumeric());
  
-                Ok(false)
-            }
+        if valid_idents.iter().any(|i| i == word) {
+            continue;
+        }
  
-            check_block(&mut $parser, $new_line)
-        }};
-    }
+        // Adjust for the current word
+        let offset = word.as_ptr() as usize - text.as_ptr() as usize;
+        let span = Span::new(
+            span.lo() + BytePos::from_usize(offset),
+            span.lo() + BytePos::from_usize(offset + word.len()),
+            span.ctxt(),
+        );
  
-    loop {
-        match parser.next() {
-            Some((new_line, c)) => {
-                match c {
-                    '#' if new_line => {
-                        // don’t warn on titles
-                        parser.next_line();
-                    },
-                    '`' => {
-                        if try!(check_block!(parser, '`', new_line)) {
-                            continue;
-                        }
-
-                        try!(parser.jump_to('`')); // not a code block, just inline code
-                    },
-                    '~' => {
-                        if try!(check_block!(parser, '~', new_line)) {
-                            continue;
-                        }
-
-                        // ~ does not introduce inline code, but two of them introduce
-                        // strikethrough. Too bad for the consistency but we don't care about
-                        // strikethrough.
-                    },
-                    '[' => {
-                        // Check for a reference definition `[foo]:` at the beginning of a line
-                        let mut link = true;
-
-                        if new_line {
-                            let mut lookup_parser = parser.clone();
-                            if lookup_parser.any(|(_, c)| c == ']') {
-                                if let Some((_, ':')) = lookup_parser.next() {
-                                    lookup_parser.next_line();
-                                    parser = lookup_parser;
-                                    link = false;
-                                }
-                            }
-                        }
-
-                        parser.advance_begin();
-                        parser.link = link;
-                    },
-                    ']' if parser.link => {
-                        parser.link = false;
-
-                        match parser.peek() {
-                            Some('(') => {
-                                try!(parser.jump_to(')'));
-                            },
-                            Some('[') => {
-                                try!(parser.jump_to(']'));
-                            },
-                            Some(_) => continue,
-                            None => return Err(()),
-                        }
-                    },
-                    c if !is_path_char(c) => {
-                        parser.advance_begin();
-                    },
-                    _ => {
-                        if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) {
-                            parser.put_back(c);
-                        }
-
-                        let (word, span) = parser.word();
-                        check_word(cx, valid_idents, word, span);
-                        parser.advance_begin();
-                    },
-                }
-
-            },
-            None => break,
-        }
+        check_word(cx, word, span);
      }
-
-    Ok(())
  }
  
-fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) {
-    /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is
-    /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok).
+fn check_word(cx: &EarlyContext, word: &str, span: Span) {
+    /// Checks if a string is camel-case, ie. contains at least two uppercase
+    /// letter (`Clippy` is
+    /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded
+    /// (`IDs` is ok).
      fn is_camel_case(s: &str) -> bool {
          if s.starts_with(|c: char| c.is_digit(10)) {
              return false;
@@ -385,26 +265,19 @@ fn is_camel_case(s: &str) -> bool {
          };
  
          s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
-        s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
+            s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
      }
  
      fn has_underscore(s: &str) -> bool {
          s != "_" && !s.contains("\\_") && s.contains('_')
      }
  
-    // Trim punctuation as in `some comment (see foo::bar).`
-    //                                                   ^^
-    // Or even as in `_foo bar_` which is emphasized.
-    let word = word.trim_matches(|c: char| !c.is_alphanumeric());
-
-    if valid_idents.iter().any(|i| i == word) {
-        return;
-    }
-
      if has_underscore(word) || word.contains("::") || is_camel_case(word) {
-        span_lint(cx,
-                  DOC_MARKDOWN,
-                  span,
-                  &format!("you should put `{}` between ticks in the documentation", word));
+        span_lint(
+            cx,
+            DOC_MARKDOWN,
+            span,
+            &format!("you should put `{}` between ticks in the documentation", word),
+        );
      }
  }