clippy_lints/src/doc.rs

   1 use rustc::lint::*;
   2 use syntax::ast;
   3 use syntax::codemap::{Span, BytePos};
   4 use utils::span_lint;
   5
   6 /// **What it does:** Checks for the presence of `_`, `::` or camel-case words
   7 /// outside ticks in documentation.
   8 ///
   9 /// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and
  10 /// camel-case probably indicates some code which should be included between
  11 /// ticks. `_` can also be used for empasis in markdown, this lint tries to
  12 /// consider that.
  13 ///
  14 /// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks
  15 /// for is limited, and there are still false positives.
  16 ///
  17 /// **Examples:**
  18 /// ```rust
  19 /// /// Do something with the foo_bar parameter. See also that::other::module::foo.
  20 /// // ^ `foo_bar` and `that::other::module::foo` should be ticked.
  21 /// fn doit(foo_bar) { .. }
  22 /// ```
  23 declare_lint! {
  24     pub DOC_MARKDOWN,
  25     Warn,
  26     "presence of `_`, `::` or camel-case outside backticks in documentation"
  27 }
  28
  29 #[derive(Clone)]
  30 pub struct Doc {
  31     valid_idents: Vec<String>,
  32 }
  33
  34 impl Doc {
  35     pub fn new(valid_idents: Vec<String>) -> Self {
  36         Doc { valid_idents: valid_idents }
  37     }
  38 }
  39
  40 impl LintPass for Doc {
  41     fn get_lints(&self) -> LintArray {
  42         lint_array![DOC_MARKDOWN]
  43     }
  44 }
  45
  46 impl EarlyLintPass for Doc {
  47     fn check_crate(&mut self, cx: &EarlyContext, krate: &ast::Crate) {
  48         check_attrs(cx, &self.valid_idents, &krate.attrs);
  49     }
  50
  51     fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) {
  52         check_attrs(cx, &self.valid_idents, &item.attrs);
  53     }
  54 }
  55
  56 /// Cleanup documentation decoration (`///` and such).
  57 ///
  58 /// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or
  59 /// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we need to keep track of
  60 /// the span but this function is inspired from the later.
  61 #[allow(cast_possible_truncation)]
  62 pub fn strip_doc_comment_decoration((comment, span): (&str, Span)) -> Vec<(&str, Span)> {
  63     // one-line comments lose their prefix
  64     const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
  65     for prefix in ONELINERS {
  66         if comment.starts_with(*prefix) {
  67             return vec![(
  68                 &comment[prefix.len()..],
  69                 Span { lo: span.lo + BytePos(prefix.len() as u32), ..span }
  70             )];
  71         }
  72     }
  73
  74     if comment.starts_with("/*") {
  75         return comment[3..comment.len() - 2].lines().map(|line| {
  76             let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
  77             debug_assert_eq!(offset as u32 as usize, offset);
  78
  79             (
  80                 line,
  81                 Span {
  82                     lo: span.lo + BytePos(offset as u32),
  83                     ..span
  84                 }
  85             )
  86         }).collect();
  87     }
  88
  89     panic!("not a doc-comment: {}", comment);
  90 }
  91
  92 pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
  93     let mut docs = vec![];
  94
  95     for attr in attrs {
  96         if attr.node.is_sugared_doc {
  97             if let ast::MetaItemKind::NameValue(_, ref doc) = attr.node.value.node {
  98                 if let ast::LitKind::Str(ref doc, _) = doc.node {
  99                     docs.extend_from_slice(&strip_doc_comment_decoration((doc, attr.span)));
 100                 }
 101             }
 102         }
 103     }
 104
 105     if !docs.is_empty() {
 106         let _ = check_doc(cx, valid_idents, &docs);
 107     }
 108 }
 109
 110 #[allow(while_let_loop)] // #362
 111 fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(&str, Span)]) -> Result<(), ()> {
 112     // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
 113     // There really is no markdown specification that would disambiguate this properly. This is
 114     // what GitHub and Rustdoc do:
 115     //
 116     // foo_bar test_quz    → foo_bar test_quz
 117     // foo_bar_baz         → foo_bar_baz (note that the “official” spec says this should be emphasized)
 118     // _foo bar_ test_quz_ → <em>foo bar</em> test_quz_
 119     // \_foo bar\_         → _foo bar_
 120     // (_baz_)             → (<em>baz</em>)
 121     // foo _ bar _ baz     → foo _ bar _ baz
 122
 123     /// Character that can appear in a path
 124     fn is_path_char(c: char) -> bool {
 125         match c {
 126             t if t.is_alphanumeric() => true,
 127             ':' | '_' => true,
 128             _ => false,
 129         }
 130     }
 131
 132     #[derive(Clone, Debug)]
 133     /// This type is used to iterate through the documentation characters, keeping the span at the
 134     /// same time.
 135     struct Parser<'a> {
 136         /// First byte of the current potential match
 137         current_word_begin: usize,
 138         /// List of lines and their associated span
 139         docs: &'a [(&'a str, Span)],
 140         /// Index of the current line we are parsing
 141         line: usize,
 142         /// Whether we are in a link
 143         link: bool,
 144         /// Whether we are at the beginning of a line
 145         new_line: bool,
 146         /// Whether we were to the end of a line last time `next` was called
 147         reset: bool,
 148         /// The position of the current character within the current line
 149         pos: usize,
 150     }
 151
 152     impl<'a> Parser<'a> {
 153         fn advance_begin(&mut self) {
 154             self.current_word_begin = self.pos;
 155         }
 156
 157         fn line(&self) -> (&'a str, Span) {
 158             self.docs[self.line]
 159         }
 160
 161         fn peek(&self) -> Option<char> {
 162             self.line().0[self.pos..].chars().next()
 163         }
 164
 165         #[allow(while_let_on_iterator)] // borrowck complains about for
 166         fn jump_to(&mut self, n: char) -> Result<bool, ()> {
 167             while let Some((new_line, c)) = self.next() {
 168                 if c == n {
 169                     self.advance_begin();
 170                     return Ok(new_line);
 171                 }
 172             }
 173
 174             Err(())
 175         }
 176
 177         fn next_line(&mut self) {
 178             self.pos = 0;
 179             self.current_word_begin = 0;
 180             self.line += 1;
 181             self.new_line = true;
 182         }
 183
 184         fn put_back(&mut self, c: char) {
 185             self.pos -= c.len_utf8();
 186         }
 187
 188         #[allow(cast_possible_truncation)]
 189         fn word(&self) -> (&'a str, Span) {
 190             let begin = self.current_word_begin;
 191             let end = self.pos;
 192
 193             debug_assert_eq!(end as u32 as usize, end);
 194             debug_assert_eq!(begin as u32 as usize, begin);
 195
 196             let (doc, mut span) = self.line();
 197             span.hi = span.lo + BytePos(end as u32);
 198             span.lo = span.lo + BytePos(begin as u32);
 199
 200             (&doc[begin..end], span)
 201         }
 202     }
 203
 204     impl<'a> Iterator for Parser<'a> {
 205         type Item = (bool, char);
 206
 207         fn next(&mut self) -> Option<(bool, char)> {
 208             while self.line < self.docs.len() {
 209                 if self.reset {
 210                     self.line += 1;
 211                     self.reset = false;
 212                     self.pos = 0;
 213                     self.current_word_begin = 0;
 214                 }
 215
 216                 let mut chars = self.line().0[self.pos..].chars();
 217                 let c = chars.next();
 218
 219                 if let Some(c) = c {
 220                     self.pos += c.len_utf8();
 221                     let new_line = self.new_line;
 222                     self.new_line = c == '\n' || (self.new_line && c.is_whitespace());
 223                     return Some((new_line, c));
 224                 } else if self.line == self.docs.len() - 1 {
 225                     return None;
 226                 } else {
 227                     self.new_line = true;
 228                     self.reset = true;
 229                     self.pos += 1;
 230                     return Some((true, '\n'));
 231                 }
 232             }
 233
 234             None
 235         }
 236     }
 237
 238     let mut parser = Parser {
 239         current_word_begin: 0,
 240         docs: docs,
 241         line: 0,
 242         link: false,
 243         new_line: true,
 244         reset: false,
 245         pos: 0,
 246     };
 247
 248     /// Check for fanced code block.
 249     macro_rules! check_block {
 250         ($parser:expr, $c:tt, $new_line:expr) => {{
 251             check_block!($parser, $c, $c, $new_line)
 252         }};
 253
 254         ($parser:expr, $c:pat, $c_expr:expr, $new_line:expr) => {{
 255             fn check_block(parser: &mut Parser, new_line: bool) -> Result<bool, ()> {
 256                 if new_line {
 257                     let mut lookup_parser = parser.clone();
 258                     if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) {
 259                         *parser = lookup_parser;
 260                         // 3 or more ` or ~ open a code block to be closed with the same number of ` or ~
 261                         let mut open_count = 3;
 262                         while let Some((false, $c)) = parser.next() {
 263                             open_count += 1;
 264                         }
 265
 266                         loop {
 267                             loop {
 268                                 if try!(parser.jump_to($c_expr)) {
 269                                     break;
 270                                 }
 271                             }
 272
 273                             lookup_parser = parser.clone();
 274                             if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) {
 275                                 let mut close_count = 3;
 276                                 while let Some((false, $c)) = lookup_parser.next() {
 277                                     close_count += 1;
 278                                 }
 279
 280                                 if close_count == open_count {
 281                                     *parser = lookup_parser;
 282                                     return Ok(true);
 283                                 }
 284                             }
 285                         }
 286                     }
 287                 }
 288
 289                 Ok(false)
 290             }
 291
 292             check_block(&mut $parser, $new_line)
 293         }};
 294     }
 295
 296     loop {
 297         match parser.next() {
 298             Some((new_line, c)) => {
 299                 match c {
 300                     '#' if new_line => { // don’t warn on titles
 301                         parser.next_line();
 302                     }
 303                     '`' => {
 304                         if try!(check_block!(parser, '`', new_line)) {
 305                             continue;
 306                         }
 307
 308                         try!(parser.jump_to('`')); // not a code block, just inline code
 309                     }
 310                     '~' => {
 311                         if try!(check_block!(parser, '~', new_line)) {
 312                             continue;
 313                         }
 314
 315                         // ~ does not introduce inline code, but two of them introduce
 316                         // strikethrough. Too bad for the consistency but we don't care about
 317                         // strikethrough.
 318                     }
 319                     '[' => {
 320                         // Check for a reference definition `[foo]:` at the beginning of a line
 321                         let mut link = true;
 322
 323                         if new_line {
 324                             let mut lookup_parser = parser.clone();
 325                             if lookup_parser.any(|(_, c)| c == ']') {
 326                                 if let Some((_, ':')) = lookup_parser.next() {
 327                                     lookup_parser.next_line();
 328                                     parser = lookup_parser;
 329                                     link = false;
 330                                 }
 331                             }
 332                         }
 333
 334                         parser.advance_begin();
 335                         parser.link = link;
 336                     }
 337                     ']' if parser.link => {
 338                         parser.link = false;
 339
 340                         match parser.peek() {
 341                             Some('(') => {
 342                                 try!(parser.jump_to(')'));
 343                             }
 344                             Some('[') => {
 345                                 try!(parser.jump_to(']'));
 346                             }
 347                             Some(_) => continue,
 348                             None => return Err(()),
 349                         }
 350                     }
 351                     c if !is_path_char(c) => {
 352                         parser.advance_begin();
 353                     }
 354                     _ => {
 355                         if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) {
 356                             parser.put_back(c);
 357                         }
 358
 359                         let (word, span) = parser.word();
 360                         check_word(cx, valid_idents, word, span);
 361                         parser.advance_begin();
 362                     }
 363                 }
 364
 365             }
 366             None => break,
 367         }
 368     }
 369
 370     Ok(())
 371 }
 372
 373 fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) {
 374     /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is
 375     /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok).
 376     fn is_camel_case(s: &str) -> bool {
 377         if s.starts_with(|c: char| c.is_digit(10)) {
 378             return false;
 379         }
 380
 381         let s = if s.ends_with('s') {
 382             &s[..s.len() - 1]
 383         } else {
 384             s
 385         };
 386
 387         s.chars().all(char::is_alphanumeric) &&
 388         s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
 389         s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
 390     }
 391
 392     fn has_underscore(s: &str) -> bool {
 393         s != "_" && !s.contains("\\_") && s.contains('_')
 394     }
 395
 396     // Trim punctuation as in `some comment (see foo::bar).`
 397     //                                                   ^^
 398     // Or even as in `_foo bar_` which is emphasized.
 399     let word = word.trim_matches(|c: char| !c.is_alphanumeric());
 400
 401     if valid_idents.iter().any(|i| i == word) {
 402         return;
 403     }
 404
 405     if has_underscore(word) || word.contains("::") || is_camel_case(word) {
 406         span_lint(cx,
 407                   DOC_MARKDOWN,
 408                   span,
 409                   &format!("you should put `{}` between ticks in the documentation", word));
 410     }
 411 }