clippy_lints/src/doc.rs

   1 use rustc::lint::*;
   2 use syntax::ast;
   3 use syntax::codemap::{Span, BytePos};
   4 use utils::span_lint;
   5
   6 /// **What it does:** Checks for the presence of `_`, `::` or camel-case words
   7 /// outside ticks in documentation.
   8 ///
   9 /// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and
  10 /// camel-case probably indicates some code which should be included between
  11 /// ticks. `_` can also be used for empasis in markdown, this lint tries to
  12 /// consider that.
  13 ///
  14 /// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks
  15 /// for is limited, and there are still false positives.
  16 ///
  17 /// **Examples:**
  18 /// ```rust
  19 /// /// Do something with the foo_bar parameter. See also that::other::module::foo.
  20 /// // ^ `foo_bar` and `that::other::module::foo` should be ticked.
  21 /// fn doit(foo_bar) { .. }
  22 /// ```
  23 declare_lint! {
  24     pub DOC_MARKDOWN,
  25     Warn,
  26     "presence of `_`, `::` or camel-case outside backticks in documentation"
  27 }
  28
  29 #[derive(Clone)]
  30 pub struct Doc {
  31     valid_idents: Vec<String>,
  32 }
  33
  34 impl Doc {
  35     pub fn new(valid_idents: Vec<String>) -> Self {
  36         Doc { valid_idents: valid_idents }
  37     }
  38 }
  39
  40 impl LintPass for Doc {
  41     fn get_lints(&self) -> LintArray {
  42         lint_array![DOC_MARKDOWN]
  43     }
  44 }
  45
  46 impl EarlyLintPass for Doc {
  47     fn check_crate(&mut self, cx: &EarlyContext, krate: &ast::Crate) {
  48         check_attrs(cx, &self.valid_idents, &krate.attrs);
  49     }
  50
  51     fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) {
  52         check_attrs(cx, &self.valid_idents, &item.attrs);
  53     }
  54 }
  55
  56 /// Cleanup documentation decoration (`///` and such).
  57 ///
  58 /// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or
  59 /// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we need to keep track of
  60 /// the span but this function is inspired from the later.
  61 #[allow(cast_possible_truncation)]
  62 pub fn strip_doc_comment_decoration((comment, span): (String, Span)) -> Vec<(String, Span)> {
  63     // one-line comments lose their prefix
  64     const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
  65     for prefix in ONELINERS {
  66         if comment.starts_with(*prefix) {
  67             return vec![(
  68                 comment[prefix.len()..].to_owned(),
  69                 Span { lo: span.lo + BytePos(prefix.len() as u32), ..span }
  70             )];
  71         }
  72     }
  73
  74     if comment.starts_with("/*") {
  75         return comment[3..comment.len() - 2].lines().map(|line| {
  76             let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
  77             debug_assert_eq!(offset as u32 as usize, offset);
  78
  79             (
  80                 line.to_owned(),
  81                 Span {
  82                     lo: span.lo + BytePos(offset as u32),
  83                     ..span
  84                 }
  85             )
  86         }).collect();
  87     }
  88
  89     panic!("not a doc-comment: {}", comment);
  90 }
  91
  92 pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
  93     let mut docs = vec![];
  94
  95     for attr in attrs {
  96         if attr.is_sugared_doc {
  97             if let ast::MetaItemKind::NameValue(ref doc) = attr.value.node {
  98                 if let ast::LitKind::Str(ref doc, _) = doc.node {
  99                     let doc = (*doc.as_str()).to_owned();
 100                     docs.extend_from_slice(&strip_doc_comment_decoration((doc, attr.span)));
 101                 }
 102             }
 103         }
 104     }
 105
 106     if !docs.is_empty() {
 107         let _ = check_doc(cx, valid_idents, &docs);
 108     }
 109 }
 110
 111 #[allow(while_let_loop)] // #362
 112 fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(String, Span)]) -> Result<(), ()> {
 113     // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
 114     // There really is no markdown specification that would disambiguate this properly. This is
 115     // what GitHub and Rustdoc do:
 116     //
 117     // foo_bar test_quz    → foo_bar test_quz
 118     // foo_bar_baz         → foo_bar_baz (note that the “official” spec says this should be emphasized)
 119     // _foo bar_ test_quz_ → <em>foo bar</em> test_quz_
 120     // \_foo bar\_         → _foo bar_
 121     // (_baz_)             → (<em>baz</em>)
 122     // foo _ bar _ baz     → foo _ bar _ baz
 123
 124     /// Character that can appear in a path
 125     fn is_path_char(c: char) -> bool {
 126         match c {
 127             t if t.is_alphanumeric() => true,
 128             ':' | '_' => true,
 129             _ => false,
 130         }
 131     }
 132
 133     #[derive(Clone, Debug)]
 134     /// This type is used to iterate through the documentation characters, keeping the span at the
 135     /// same time.
 136     struct Parser<'a> {
 137         /// First byte of the current potential match
 138         current_word_begin: usize,
 139         /// List of lines and their associated span
 140         docs: &'a [(String, Span)],
 141         /// Index of the current line we are parsing
 142         line: usize,
 143         /// Whether we are in a link
 144         link: bool,
 145         /// Whether we are at the beginning of a line
 146         new_line: bool,
 147         /// Whether we were to the end of a line last time `next` was called
 148         reset: bool,
 149         /// The position of the current character within the current line
 150         pos: usize,
 151     }
 152
 153     impl<'a> Parser<'a> {
 154         fn advance_begin(&mut self) {
 155             self.current_word_begin = self.pos;
 156         }
 157
 158         fn line(&self) -> (&'a str, Span) {
 159             let (ref doc, span) = self.docs[self.line];
 160             (doc, span)
 161         }
 162
 163         fn peek(&self) -> Option<char> {
 164             self.line().0[self.pos..].chars().next()
 165         }
 166
 167         #[allow(while_let_on_iterator)] // borrowck complains about for
 168         fn jump_to(&mut self, n: char) -> Result<bool, ()> {
 169             while let Some((new_line, c)) = self.next() {
 170                 if c == n {
 171                     self.advance_begin();
 172                     return Ok(new_line);
 173                 }
 174             }
 175
 176             Err(())
 177         }
 178
 179         fn next_line(&mut self) {
 180             self.pos = 0;
 181             self.current_word_begin = 0;
 182             self.line += 1;
 183             self.new_line = true;
 184         }
 185
 186         fn put_back(&mut self, c: char) {
 187             self.pos -= c.len_utf8();
 188         }
 189
 190         #[allow(cast_possible_truncation)]
 191         fn word(&self) -> (&'a str, Span) {
 192             let begin = self.current_word_begin;
 193             let end = self.pos;
 194
 195             debug_assert_eq!(end as u32 as usize, end);
 196             debug_assert_eq!(begin as u32 as usize, begin);
 197
 198             let (doc, mut span) = self.line();
 199             span.hi = span.lo + BytePos(end as u32);
 200             span.lo = span.lo + BytePos(begin as u32);
 201
 202             (&doc[begin..end], span)
 203         }
 204     }
 205
 206     impl<'a> Iterator for Parser<'a> {
 207         type Item = (bool, char);
 208
 209         fn next(&mut self) -> Option<(bool, char)> {
 210             while self.line < self.docs.len() {
 211                 if self.reset {
 212                     self.line += 1;
 213                     self.reset = false;
 214                     self.pos = 0;
 215                     self.current_word_begin = 0;
 216                 }
 217
 218                 let mut chars = self.line().0[self.pos..].chars();
 219                 let c = chars.next();
 220
 221                 if let Some(c) = c {
 222                     self.pos += c.len_utf8();
 223                     let new_line = self.new_line;
 224                     self.new_line = c == '\n' || (self.new_line && c.is_whitespace());
 225                     return Some((new_line, c));
 226                 } else if self.line == self.docs.len() - 1 {
 227                     return None;
 228                 } else {
 229                     self.new_line = true;
 230                     self.reset = true;
 231                     self.pos += 1;
 232                     return Some((true, '\n'));
 233                 }
 234             }
 235
 236             None
 237         }
 238     }
 239
 240     let mut parser = Parser {
 241         current_word_begin: 0,
 242         docs: docs,
 243         line: 0,
 244         link: false,
 245         new_line: true,
 246         reset: false,
 247         pos: 0,
 248     };
 249
 250     /// Check for fanced code block.
 251     macro_rules! check_block {
 252         ($parser:expr, $c:tt, $new_line:expr) => {{
 253             check_block!($parser, $c, $c, $new_line)
 254         }};
 255
 256         ($parser:expr, $c:pat, $c_expr:expr, $new_line:expr) => {{
 257             fn check_block(parser: &mut Parser, new_line: bool) -> Result<bool, ()> {
 258                 if new_line {
 259                     let mut lookup_parser = parser.clone();
 260                     if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) {
 261                         *parser = lookup_parser;
 262                         // 3 or more ` or ~ open a code block to be closed with the same number of ` or ~
 263                         let mut open_count = 3;
 264                         while let Some((false, $c)) = parser.next() {
 265                             open_count += 1;
 266                         }
 267
 268                         loop {
 269                             loop {
 270                                 if try!(parser.jump_to($c_expr)) {
 271                                     break;
 272                                 }
 273                             }
 274
 275                             lookup_parser = parser.clone();
 276                             if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) {
 277                                 let mut close_count = 3;
 278                                 while let Some((false, $c)) = lookup_parser.next() {
 279                                     close_count += 1;
 280                                 }
 281
 282                                 if close_count == open_count {
 283                                     *parser = lookup_parser;
 284                                     return Ok(true);
 285                                 }
 286                             }
 287                         }
 288                     }
 289                 }
 290
 291                 Ok(false)
 292             }
 293
 294             check_block(&mut $parser, $new_line)
 295         }};
 296     }
 297
 298     loop {
 299         match parser.next() {
 300             Some((new_line, c)) => {
 301                 match c {
 302                     '#' if new_line => { // don’t warn on titles
 303                         parser.next_line();
 304                     }
 305                     '`' => {
 306                         if try!(check_block!(parser, '`', new_line)) {
 307                             continue;
 308                         }
 309
 310                         try!(parser.jump_to('`')); // not a code block, just inline code
 311                     }
 312                     '~' => {
 313                         if try!(check_block!(parser, '~', new_line)) {
 314                             continue;
 315                         }
 316
 317                         // ~ does not introduce inline code, but two of them introduce
 318                         // strikethrough. Too bad for the consistency but we don't care about
 319                         // strikethrough.
 320                     }
 321                     '[' => {
 322                         // Check for a reference definition `[foo]:` at the beginning of a line
 323                         let mut link = true;
 324
 325                         if new_line {
 326                             let mut lookup_parser = parser.clone();
 327                             if lookup_parser.any(|(_, c)| c == ']') {
 328                                 if let Some((_, ':')) = lookup_parser.next() {
 329                                     lookup_parser.next_line();
 330                                     parser = lookup_parser;
 331                                     link = false;
 332                                 }
 333                             }
 334                         }
 335
 336                         parser.advance_begin();
 337                         parser.link = link;
 338                     }
 339                     ']' if parser.link => {
 340                         parser.link = false;
 341
 342                         match parser.peek() {
 343                             Some('(') => {
 344                                 try!(parser.jump_to(')'));
 345                             }
 346                             Some('[') => {
 347                                 try!(parser.jump_to(']'));
 348                             }
 349                             Some(_) => continue,
 350                             None => return Err(()),
 351                         }
 352                     }
 353                     c if !is_path_char(c) => {
 354                         parser.advance_begin();
 355                     }
 356                     _ => {
 357                         if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) {
 358                             parser.put_back(c);
 359                         }
 360
 361                         let (word, span) = parser.word();
 362                         check_word(cx, valid_idents, word, span);
 363                         parser.advance_begin();
 364                     }
 365                 }
 366
 367             }
 368             None => break,
 369         }
 370     }
 371
 372     Ok(())
 373 }
 374
 375 fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) {
 376     /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is
 377     /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok).
 378     fn is_camel_case(s: &str) -> bool {
 379         if s.starts_with(|c: char| c.is_digit(10)) {
 380             return false;
 381         }
 382
 383         let s = if s.ends_with('s') {
 384             &s[..s.len() - 1]
 385         } else {
 386             s
 387         };
 388
 389         s.chars().all(char::is_alphanumeric) &&
 390         s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
 391         s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
 392     }
 393
 394     fn has_underscore(s: &str) -> bool {
 395         s != "_" && !s.contains("\\_") && s.contains('_')
 396     }
 397
 398     // Trim punctuation as in `some comment (see foo::bar).`
 399     //                                                   ^^
 400     // Or even as in `_foo bar_` which is emphasized.
 401     let word = word.trim_matches(|c: char| !c.is_alphanumeric());
 402
 403     if valid_idents.iter().any(|i| i == word) {
 404         return;
 405     }
 406
 407     if has_underscore(word) || word.contains("::") || is_camel_case(word) {
 408         span_lint(cx,
 409                   DOC_MARKDOWN,
 410                   span,
 411                   &format!("you should put `{}` between ticks in the documentation", word));
 412     }
 413 }