clippy_lints/src/doc.rs

   1 use rustc::lint::*;
   2 use syntax::ast;
   3 use syntax::codemap::{Span, BytePos};
   4 use utils::span_lint;
   5
   6 /// **What it does:** Checks for the presence of `_`, `::` or camel-case words
   7 /// outside ticks in documentation.
   8 ///
   9 /// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and
  10 /// camel-case probably indicates some code which should be included between
  11 /// ticks. `_` can also be used for empasis in markdown, this lint tries to
  12 /// consider that.
  13 ///
  14 /// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks
  15 /// for is limited, and there are still false positives.
  16 ///
  17 /// **Examples:**
  18 /// ```rust
  19 /// /// Do something with the foo_bar parameter. See also that::other::module::foo.
  20 /// // ^ `foo_bar` and `that::other::module::foo` should be ticked.
  21 /// fn doit(foo_bar) { .. }
  22 /// ```
  23 declare_lint! {
  24     pub DOC_MARKDOWN,
  25     Warn,
  26     "presence of `_`, `::` or camel-case outside backticks in documentation"
  27 }
  28
  29 #[derive(Clone)]
  30 pub struct Doc {
  31     valid_idents: Vec<String>,
  32 }
  33
  34 impl Doc {
  35     pub fn new(valid_idents: Vec<String>) -> Self {
  36         Doc { valid_idents: valid_idents }
  37     }
  38 }
  39
  40 impl LintPass for Doc {
  41     fn get_lints(&self) -> LintArray {
  42         lint_array![DOC_MARKDOWN]
  43     }
  44 }
  45
  46 impl EarlyLintPass for Doc {
  47     fn check_crate(&mut self, cx: &EarlyContext, krate: &ast::Crate) {
  48         check_attrs(cx, &self.valid_idents, &krate.attrs);
  49     }
  50
  51     fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) {
  52         check_attrs(cx, &self.valid_idents, &item.attrs);
  53     }
  54 }
  55
  56 /// Cleanup documentation decoration (`///` and such).
  57 ///
  58 /// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or
  59 /// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we need to keep track of
  60 /// the span but this function is inspired from the later.
  61 #[allow(cast_possible_truncation)]
  62 pub fn strip_doc_comment_decoration((comment, span): (String, Span)) -> Vec<(String, Span)> {
  63     // one-line comments lose their prefix
  64     const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
  65     for prefix in ONELINERS {
  66         if comment.starts_with(*prefix) {
  67             return vec![(comment[prefix.len()..].to_owned(),
  68                          Span { lo: span.lo + BytePos(prefix.len() as u32), ..span })];
  69         }
  70     }
  71
  72     if comment.starts_with("/*") {
  73         return comment[3..comment.len() - 2]
  74             .lines()
  75             .map(|line| {
  76                 let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
  77                 debug_assert_eq!(offset as u32 as usize, offset);
  78
  79                 (line.to_owned(), Span { lo: span.lo + BytePos(offset as u32), ..span })
  80             })
  81             .collect();
  82     }
  83
  84     panic!("not a doc-comment: {}", comment);
  85 }
  86
  87 pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
  88     let mut docs = vec![];
  89
  90     for attr in attrs {
  91         if attr.is_sugared_doc {
  92             if let ast::MetaItemKind::NameValue(ref doc) = attr.value.node {
  93                 if let ast::LitKind::Str(ref doc, _) = doc.node {
  94                     let doc = (*doc.as_str()).to_owned();
  95                     docs.extend_from_slice(&strip_doc_comment_decoration((doc, attr.span)));
  96                 }
  97             }
  98         }
  99     }
 100
 101     if !docs.is_empty() {
 102         let _ = check_doc(cx, valid_idents, &docs);
 103     }
 104 }
 105
 106 #[allow(while_let_loop)] // #362
 107 fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(String, Span)]) -> Result<(), ()> {
 108     // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
 109     // There really is no markdown specification that would disambiguate this properly. This is
 110     // what GitHub and Rustdoc do:
 111     //
 112     // foo_bar test_quz    → foo_bar test_quz
 113     // foo_bar_baz         → foo_bar_baz (note that the “official” spec says this should be emphasized)
 114     // _foo bar_ test_quz_ → <em>foo bar</em> test_quz_
 115     // \_foo bar\_         → _foo bar_
 116     // (_baz_)             → (<em>baz</em>)
 117     // foo _ bar _ baz     → foo _ bar _ baz
 118
 119     /// Character that can appear in a path
 120     fn is_path_char(c: char) -> bool {
 121         match c {
 122             t if t.is_alphanumeric() => true,
 123             ':' | '_' => true,
 124             _ => false,
 125         }
 126     }
 127
 128     #[derive(Clone, Debug)]
 129     /// This type is used to iterate through the documentation characters, keeping the span at the
 130     /// same time.
 131     struct Parser<'a> {
 132         /// First byte of the current potential match
 133         current_word_begin: usize,
 134         /// List of lines and their associated span
 135         docs: &'a [(String, Span)],
 136         /// Index of the current line we are parsing
 137         line: usize,
 138         /// Whether we are in a link
 139         link: bool,
 140         /// Whether we are at the beginning of a line
 141         new_line: bool,
 142         /// Whether we were to the end of a line last time `next` was called
 143         reset: bool,
 144         /// The position of the current character within the current line
 145         pos: usize,
 146     }
 147
 148     impl<'a> Parser<'a> {
 149         fn advance_begin(&mut self) {
 150             self.current_word_begin = self.pos;
 151         }
 152
 153         fn line(&self) -> (&'a str, Span) {
 154             let (ref doc, span) = self.docs[self.line];
 155             (doc, span)
 156         }
 157
 158         fn peek(&self) -> Option<char> {
 159             self.line().0[self.pos..].chars().next()
 160         }
 161
 162         #[allow(while_let_on_iterator)] // borrowck complains about for
 163         fn jump_to(&mut self, n: char) -> Result<bool, ()> {
 164             while let Some((new_line, c)) = self.next() {
 165                 if c == n {
 166                     self.advance_begin();
 167                     return Ok(new_line);
 168                 }
 169             }
 170
 171             Err(())
 172         }
 173
 174         fn next_line(&mut self) {
 175             self.pos = 0;
 176             self.current_word_begin = 0;
 177             self.line += 1;
 178             self.new_line = true;
 179         }
 180
 181         fn put_back(&mut self, c: char) {
 182             self.pos -= c.len_utf8();
 183         }
 184
 185         #[allow(cast_possible_truncation)]
 186         fn word(&self) -> (&'a str, Span) {
 187             let begin = self.current_word_begin;
 188             let end = self.pos;
 189
 190             debug_assert_eq!(end as u32 as usize, end);
 191             debug_assert_eq!(begin as u32 as usize, begin);
 192
 193             let (doc, mut span) = self.line();
 194             span.hi = span.lo + BytePos(end as u32);
 195             span.lo = span.lo + BytePos(begin as u32);
 196
 197             (&doc[begin..end], span)
 198         }
 199     }
 200
 201     impl<'a> Iterator for Parser<'a> {
 202         type Item = (bool, char);
 203
 204         fn next(&mut self) -> Option<(bool, char)> {
 205             while self.line < self.docs.len() {
 206                 if self.reset {
 207                     self.line += 1;
 208                     self.reset = false;
 209                     self.pos = 0;
 210                     self.current_word_begin = 0;
 211                 }
 212
 213                 let mut chars = self.line().0[self.pos..].chars();
 214                 let c = chars.next();
 215
 216                 if let Some(c) = c {
 217                     self.pos += c.len_utf8();
 218                     let new_line = self.new_line;
 219                     self.new_line = c == '\n' || (self.new_line && c.is_whitespace());
 220                     return Some((new_line, c));
 221                 } else if self.line == self.docs.len() - 1 {
 222                     return None;
 223                 } else {
 224                     self.new_line = true;
 225                     self.reset = true;
 226                     self.pos += 1;
 227                     return Some((true, '\n'));
 228                 }
 229             }
 230
 231             None
 232         }
 233     }
 234
 235     let mut parser = Parser {
 236         current_word_begin: 0,
 237         docs: docs,
 238         line: 0,
 239         link: false,
 240         new_line: true,
 241         reset: false,
 242         pos: 0,
 243     };
 244
 245     /// Check for fanced code block.
 246     macro_rules! check_block {
 247         ($parser:expr, $c:tt, $new_line:expr) => {{
 248             check_block!($parser, $c, $c, $new_line)
 249         }};
 250
 251         ($parser:expr, $c:pat, $c_expr:expr, $new_line:expr) => {{
 252             fn check_block(parser: &mut Parser, new_line: bool) -> Result<bool, ()> {
 253                 if new_line {
 254                     let mut lookup_parser = parser.clone();
 255                     if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) {
 256                         *parser = lookup_parser;
 257                         // 3 or more ` or ~ open a code block to be closed with the same number of ` or ~
 258                         let mut open_count = 3;
 259                         while let Some((false, $c)) = parser.next() {
 260                             open_count += 1;
 261                         }
 262
 263                         loop {
 264                             loop {
 265                                 if try!(parser.jump_to($c_expr)) {
 266                                     break;
 267                                 }
 268                             }
 269
 270                             lookup_parser = parser.clone();
 271                             let a = lookup_parser.next();
 272                             let b = lookup_parser.next();
 273                             if let (Some((false, $c)), Some((false, $c))) = (a, b) {
 274                                 let mut close_count = 3;
 275                                 while let Some((false, $c)) = lookup_parser.next() {
 276                                     close_count += 1;
 277                                 }
 278
 279                                 if close_count == open_count {
 280                                     *parser = lookup_parser;
 281                                     return Ok(true);
 282                                 }
 283                             }
 284                         }
 285                     }
 286                 }
 287
 288                 Ok(false)
 289             }
 290
 291             check_block(&mut $parser, $new_line)
 292         }};
 293     }
 294
 295     loop {
 296         match parser.next() {
 297             Some((new_line, c)) => {
 298                 match c {
 299                     '#' if new_line => {
 300                         // don’t warn on titles
 301                         parser.next_line();
 302                     },
 303                     '`' => {
 304                         if try!(check_block!(parser, '`', new_line)) {
 305                             continue;
 306                         }
 307
 308                         try!(parser.jump_to('`')); // not a code block, just inline code
 309                     },
 310                     '~' => {
 311                         if try!(check_block!(parser, '~', new_line)) {
 312                             continue;
 313                         }
 314
 315                         // ~ does not introduce inline code, but two of them introduce
 316                         // strikethrough. Too bad for the consistency but we don't care about
 317                         // strikethrough.
 318                     },
 319                     '[' => {
 320                         // Check for a reference definition `[foo]:` at the beginning of a line
 321                         let mut link = true;
 322
 323                         if new_line {
 324                             let mut lookup_parser = parser.clone();
 325                             if lookup_parser.any(|(_, c)| c == ']') {
 326                                 if let Some((_, ':')) = lookup_parser.next() {
 327                                     lookup_parser.next_line();
 328                                     parser = lookup_parser;
 329                                     link = false;
 330                                 }
 331                             }
 332                         }
 333
 334                         parser.advance_begin();
 335                         parser.link = link;
 336                     },
 337                     ']' if parser.link => {
 338                         parser.link = false;
 339
 340                         match parser.peek() {
 341                             Some('(') => {
 342                                 try!(parser.jump_to(')'));
 343                             },
 344                             Some('[') => {
 345                                 try!(parser.jump_to(']'));
 346                             },
 347                             Some(_) => continue,
 348                             None => return Err(()),
 349                         }
 350                     },
 351                     c if !is_path_char(c) => {
 352                         parser.advance_begin();
 353                     },
 354                     _ => {
 355                         if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) {
 356                             parser.put_back(c);
 357                         }
 358
 359                         let (word, span) = parser.word();
 360                         check_word(cx, valid_idents, word, span);
 361                         parser.advance_begin();
 362                     },
 363                 }
 364
 365             },
 366             None => break,
 367         }
 368     }
 369
 370     Ok(())
 371 }
 372
 373 fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) {
 374     /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is
 375     /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok).
 376     fn is_camel_case(s: &str) -> bool {
 377         if s.starts_with(|c: char| c.is_digit(10)) {
 378             return false;
 379         }
 380
 381         let s = if s.ends_with('s') {
 382             &s[..s.len() - 1]
 383         } else {
 384             s
 385         };
 386
 387         s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
 388         s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
 389     }
 390
 391     fn has_underscore(s: &str) -> bool {
 392         s != "_" && !s.contains("\\_") && s.contains('_')
 393     }
 394
 395     // Trim punctuation as in `some comment (see foo::bar).`
 396     //                                                   ^^
 397     // Or even as in `_foo bar_` which is emphasized.
 398     let word = word.trim_matches(|c: char| !c.is_alphanumeric());
 399
 400     if valid_idents.iter().any(|i| i == word) {
 401         return;
 402     }
 403
 404     if has_underscore(word) || word.contains("::") || is_camel_case(word) {
 405         span_lint(cx,
 406                   DOC_MARKDOWN,
 407                   span,
 408                   &format!("you should put `{}` between ticks in the documentation", word));
 409     }
 410 }