clippy_lints/src/doc.rs

   1 use rustc::lint::*;
   2 use syntax::ast;
   3 use syntax::codemap::{Span, BytePos};
   4 use utils::span_lint;
   5
   6 /// **What it does:** This lint checks for the presence of `_`, `::` or camel-case words outside
   7 /// ticks in documentation.
   8 ///
   9 /// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and camel-case probably
  10 /// indicates some code which should be included between ticks. `_` can also be used for empasis in
  11 /// markdown, this lint tries to consider that.
  12 ///
  13 /// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks for is limited.
  14 ///
  15 /// **Examples:**
  16 /// ```rust
  17 /// /// Do something with the foo_bar parameter. See also that::other::module::foo.
  18 /// // ^ `foo_bar` and `that::other::module::foo` should be ticked.
  19 /// fn doit(foo_bar) { .. }
  20 /// ```
  21 declare_lint! {
  22     pub DOC_MARKDOWN, Warn,
  23     "checks for the presence of `_`, `::` or camel-case outside ticks in documentation"
  24 }
  25
  26 #[derive(Clone)]
  27 pub struct Doc {
  28     valid_idents: Vec<String>,
  29 }
  30
  31 impl Doc {
  32     pub fn new(valid_idents: Vec<String>) -> Self {
  33         Doc { valid_idents: valid_idents }
  34     }
  35 }
  36
  37 impl LintPass for Doc {
  38     fn get_lints(&self) -> LintArray {
  39         lint_array![DOC_MARKDOWN]
  40     }
  41 }
  42
  43 impl EarlyLintPass for Doc {
  44     fn check_crate(&mut self, cx: &EarlyContext, krate: &ast::Crate) {
  45         check_attrs(cx, &self.valid_idents, &krate.attrs);
  46     }
  47
  48     fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) {
  49         check_attrs(cx, &self.valid_idents, &item.attrs);
  50     }
  51 }
  52
  53 pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
  54     let mut docs = vec![];
  55
  56     let mut in_multiline = false;
  57     for attr in attrs {
  58         if attr.node.is_sugared_doc {
  59             if let ast::MetaItemKind::NameValue(_, ref doc) = attr.node.value.node {
  60                 if let ast::LitKind::Str(ref doc, _) = doc.node {
  61                     // doc comments start with `///` or `//!`
  62                     let real_doc = &doc[3..];
  63                     let mut span = attr.span;
  64                     span.lo = span.lo + BytePos(3);
  65
  66                     // check for multiline code blocks
  67                     if real_doc.trim_left().starts_with("```") {
  68                         in_multiline = !in_multiline;
  69                     } else if !in_multiline {
  70                         docs.push((real_doc, span));
  71                     }
  72                 }
  73             }
  74         }
  75     }
  76
  77     if !docs.is_empty() {
  78         let _ = check_doc(cx, valid_idents, &docs);
  79     }
  80 }
  81
  82 #[allow(while_let_loop)] // #362
  83 pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(&str, Span)]) -> Result<(), ()> {
  84     // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
  85     // There really is no markdown specification that would disambiguate this properly. This is
  86     // what GitHub and Rustdoc do:
  87     //
  88     // foo_bar test_quz    → foo_bar test_quz
  89     // foo_bar_baz         → foo_bar_baz (note that the “official” spec says this should be emphasized)
  90     // _foo bar_ test_quz_ → <em>foo bar</em> test_quz_
  91     // \_foo bar\_         → _foo bar_
  92     // (_baz_)             → (<em>baz</em>)
  93     // foo _ bar _ baz     → foo _ bar _ baz
  94
  95     /// Character that can appear in a path
  96     fn is_path_char(c: char) -> bool {
  97         match c {
  98             t if t.is_alphanumeric() => true,
  99             ':' | '_' => true,
 100             _ => false,
 101         }
 102     }
 103
 104     #[derive(Clone, Debug)]
 105     /// This type is used to iterate through the documentation characters, keeping the span at the
 106     /// same time.
 107     struct Parser<'a> {
 108         /// First byte of the current potential match
 109         current_word_begin: usize,
 110         /// List of lines and their associated span
 111         docs: &'a [(&'a str, Span)],
 112         /// Index of the current line we are parsing
 113         line: usize,
 114         /// Whether we are in a link
 115         link: bool,
 116         /// Whether we are at the beginning of a line
 117         new_line: bool,
 118         /// Whether we were to the end of a line last time `next` was called
 119         reset: bool,
 120         /// The position of the current character within the current line
 121         pos: usize,
 122     }
 123
 124     impl<'a> Parser<'a> {
 125         fn advance_begin(&mut self) {
 126             self.current_word_begin = self.pos;
 127         }
 128
 129         fn line(&self) -> (&'a str, Span) {
 130             self.docs[self.line]
 131         }
 132
 133         fn peek(&self) -> Option<char> {
 134             self.line().0[self.pos..].chars().next()
 135         }
 136
 137         #[allow(while_let_on_iterator)] // borrowck complains about for
 138         fn jump_to(&mut self, n: char) -> Result<(), ()> {
 139             while let Some((_, c)) = self.next() {
 140                 if c == n {
 141                     self.advance_begin();
 142                     return Ok(());
 143                 }
 144             }
 145
 146             Err(())
 147         }
 148
 149         fn next_line(&mut self) {
 150             self.pos = 0;
 151             self.current_word_begin = 0;
 152             self.line += 1;
 153             self.new_line = true;
 154         }
 155
 156         fn put_back(&mut self, c: char) {
 157             self.pos -= c.len_utf8();
 158         }
 159
 160         #[allow(cast_possible_truncation)]
 161         fn word(&self) -> (&'a str, Span) {
 162             let begin = self.current_word_begin;
 163             let end = self.pos;
 164
 165             debug_assert_eq!(end as u32 as usize, end);
 166             debug_assert_eq!(begin as u32 as usize, begin);
 167
 168             let (doc, mut span) = self.line();
 169             span.hi = span.lo + BytePos(end as u32);
 170             span.lo = span.lo + BytePos(begin as u32);
 171
 172             (&doc[begin..end], span)
 173         }
 174     }
 175
 176     impl<'a> Iterator for Parser<'a> {
 177         type Item = (bool, char);
 178
 179         fn next(&mut self) -> Option<(bool, char)> {
 180             while self.line < self.docs.len() {
 181                 if self.reset {
 182                     self.line += 1;
 183                     self.reset = false;
 184                     self.pos = 0;
 185                     self.current_word_begin = 0;
 186                 }
 187
 188                 let mut chars = self.line().0[self.pos..].chars();
 189                 let c = chars.next();
 190
 191                 if let Some(c) = c {
 192                     self.pos += c.len_utf8();
 193                     let new_line = self.new_line;
 194                     self.new_line = c == '\n' || (self.new_line && c.is_whitespace());
 195                     return Some((new_line, c));
 196                 } else if self.line == self.docs.len() - 1 {
 197                     return None;
 198                 } else {
 199                     self.new_line = true;
 200                     self.reset = true;
 201                     self.pos += 1;
 202                     return Some((true, '\n'));
 203                 }
 204             }
 205
 206             None
 207         }
 208     }
 209
 210     let mut parser = Parser {
 211         current_word_begin: 0,
 212         docs: docs,
 213         line: 0,
 214         link: false,
 215         new_line: true,
 216         reset: false,
 217         pos: 0,
 218     };
 219
 220     loop {
 221         match parser.next() {
 222             Some((new_line, c)) => {
 223                 match c {
 224                     '#' if new_line => { // don’t warn on titles
 225                         parser.next_line();
 226                     }
 227                     '`' => {
 228                         try!(parser.jump_to('`'));
 229                     }
 230                     '[' => {
 231                         // Check for a reference definition `[foo]:` at the beginning of a line
 232                         let mut link = true;
 233
 234                         if new_line {
 235                             let mut lookup_parser = parser.clone();
 236                             if let Some(_) = lookup_parser.find(|&(_, c)| c == ']') {
 237                                 if let Some((_, ':')) = lookup_parser.next() {
 238                                     lookup_parser.next_line();
 239                                     parser = lookup_parser;
 240                                     link = false;
 241                                 }
 242                             }
 243                         }
 244
 245                         parser.advance_begin();
 246                         parser.link = link;
 247                     }
 248                     ']' if parser.link => {
 249                         parser.link = false;
 250
 251                         match parser.peek() {
 252                             Some('(') => try!(parser.jump_to(')')),
 253                             Some('[') => try!(parser.jump_to(']')),
 254                             Some(_) => continue,
 255                             None => return Err(()),
 256                         }
 257                     }
 258                     c if !is_path_char(c) => {
 259                         parser.advance_begin();
 260                     }
 261                     _ => {
 262                         if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) {
 263                             parser.put_back(c);
 264                         }
 265
 266                         let (word, span) = parser.word();
 267                         check_word(cx, valid_idents, word, span);
 268                         parser.advance_begin();
 269                     }
 270                 }
 271
 272             }
 273             None => break,
 274         }
 275     }
 276
 277     Ok(())
 278 }
 279
 280 fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) {
 281     /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is
 282     /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok).
 283     fn is_camel_case(s: &str) -> bool {
 284         if s.starts_with(|c: char| c.is_digit(10)) {
 285             return false;
 286         }
 287
 288         let s = if s.ends_with('s') {
 289             &s[..s.len() - 1]
 290         } else {
 291             s
 292         };
 293
 294         s.chars().all(char::is_alphanumeric) &&
 295         s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
 296         s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
 297     }
 298
 299     fn has_underscore(s: &str) -> bool {
 300         s != "_" && !s.contains("\\_") && s.contains('_')
 301     }
 302
 303     // Trim punctuation as in `some comment (see foo::bar).`
 304     //                                                   ^^
 305     // Or even as in `_foo bar_` which is emphasized.
 306     let word = word.trim_matches(|c: char| !c.is_alphanumeric());
 307
 308     if valid_idents.iter().any(|i| i == word) {
 309         return;
 310     }
 311
 312     if has_underscore(word) || word.contains("::") || is_camel_case(word) {
 313         span_lint(cx,
 314                   DOC_MARKDOWN,
 315                   span,
 316                   &format!("you should put `{}` between ticks in the documentation", word));
 317     }
 318 }