clippy_lints/src/doc.rs

   1 use rustc::lint::*;
   2 use syntax::ast;
   3 use syntax::codemap::{Span, BytePos};
   4 use utils::span_lint;
   5
   6 /// **What it does:** Checks for the presence of `_`, `::` or camel-case words
   7 /// outside ticks in documentation.
   8 ///
   9 /// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and
  10 /// camel-case probably indicates some code which should be included between
  11 /// ticks. `_` can also be used for empasis in markdown, this lint tries to
  12 /// consider that.
  13 ///
  14 /// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks
  15 /// for is limited, and there are still false positives.
  16 ///
  17 /// **Examples:**
  18 /// ```rust
  19 /// /// Do something with the foo_bar parameter. See also that::other::module::foo.
  20 /// // ^ `foo_bar` and `that::other::module::foo` should be ticked.
  21 /// fn doit(foo_bar) { .. }
  22 /// ```
  23 declare_lint! {
  24     pub DOC_MARKDOWN,
  25     Warn,
  26     "presence of `_`, `::` or camel-case outside backticks in documentation"
  27 }
  28
  29 #[derive(Clone)]
  30 pub struct Doc {
  31     valid_idents: Vec<String>,
  32 }
  33
  34 impl Doc {
  35     pub fn new(valid_idents: Vec<String>) -> Self {
  36         Doc { valid_idents: valid_idents }
  37     }
  38 }
  39
  40 impl LintPass for Doc {
  41     fn get_lints(&self) -> LintArray {
  42         lint_array![DOC_MARKDOWN]
  43     }
  44 }
  45
  46 impl EarlyLintPass for Doc {
  47     fn check_crate(&mut self, cx: &EarlyContext, krate: &ast::Crate) {
  48         check_attrs(cx, &self.valid_idents, &krate.attrs);
  49     }
  50
  51     fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) {
  52         check_attrs(cx, &self.valid_idents, &item.attrs);
  53     }
  54 }
  55
  56 /// Cleanup documentation decoration (`///` and such).
  57 ///
  58 /// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or
  59 /// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we need to keep track of
  60 /// the span but this function is inspired from the later.
  61 #[allow(cast_possible_truncation)]
  62 pub fn strip_doc_comment_decoration((comment, span): (String, Span)) -> Vec<(String, Span)> {
  63     // one-line comments lose their prefix
  64     const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
  65     for prefix in ONELINERS {
  66         if comment.starts_with(*prefix) {
  67             return vec![(comment[prefix.len()..].to_owned(),
  68                          Span { lo: span.lo + BytePos(prefix.len() as u32), ..span })];
  69         }
  70     }
  71
  72     if comment.starts_with("/*") {
  73         return comment[3..comment.len() - 2]
  74             .lines()
  75             .map(|line| {
  76                 let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
  77                 debug_assert_eq!(offset as u32 as usize, offset);
  78
  79                 (line.to_owned(), Span { lo: span.lo + BytePos(offset as u32), ..span })
  80             })
  81             .collect();
  82     }
  83
  84     panic!("not a doc-comment: {}", comment);
  85 }
  86
  87 pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
  88     let mut docs = vec![];
  89
  90     for attr in attrs {
  91         if attr.is_sugared_doc {
  92             if let Some(ref doc) = attr.value_str() {
  93                 let doc = doc.to_string();
  94                 docs.extend_from_slice(&strip_doc_comment_decoration((doc, attr.span)));
  95             }
  96         }
  97     }
  98
  99     if !docs.is_empty() {
 100         let _ = check_doc(cx, valid_idents, &docs);
 101     }
 102 }
 103
 104 #[allow(while_let_loop)] // #362
 105 fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(String, Span)]) -> Result<(), ()> {
 106     // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
 107     // There really is no markdown specification that would disambiguate this properly. This is
 108     // what GitHub and Rustdoc do:
 109     //
 110     // foo_bar test_quz    → foo_bar test_quz
 111     // foo_bar_baz         → foo_bar_baz (note that the “official” spec says this should be emphasized)
 112     // _foo bar_ test_quz_ → <em>foo bar</em> test_quz_
 113     // \_foo bar\_         → _foo bar_
 114     // (_baz_)             → (<em>baz</em>)
 115     // foo _ bar _ baz     → foo _ bar _ baz
 116
 117     /// Character that can appear in a path
 118     fn is_path_char(c: char) -> bool {
 119         match c {
 120             t if t.is_alphanumeric() => true,
 121             ':' | '_' => true,
 122             _ => false,
 123         }
 124     }
 125
 126     #[derive(Clone, Debug)]
 127     /// This type is used to iterate through the documentation characters, keeping the span at the
 128     /// same time.
 129     struct Parser<'a> {
 130         /// First byte of the current potential match
 131         current_word_begin: usize,
 132         /// List of lines and their associated span
 133         docs: &'a [(String, Span)],
 134         /// Index of the current line we are parsing
 135         line: usize,
 136         /// Whether we are in a link
 137         link: bool,
 138         /// Whether we are at the beginning of a line
 139         new_line: bool,
 140         /// Whether we were to the end of a line last time `next` was called
 141         reset: bool,
 142         /// The position of the current character within the current line
 143         pos: usize,
 144     }
 145
 146     impl<'a> Parser<'a> {
 147         fn advance_begin(&mut self) {
 148             self.current_word_begin = self.pos;
 149         }
 150
 151         fn line(&self) -> (&'a str, Span) {
 152             let (ref doc, span) = self.docs[self.line];
 153             (doc, span)
 154         }
 155
 156         fn peek(&self) -> Option<char> {
 157             self.line().0[self.pos..].chars().next()
 158         }
 159
 160         #[allow(while_let_on_iterator)] // borrowck complains about for
 161         fn jump_to(&mut self, n: char) -> Result<bool, ()> {
 162             while let Some((new_line, c)) = self.next() {
 163                 if c == n {
 164                     self.advance_begin();
 165                     return Ok(new_line);
 166                 }
 167             }
 168
 169             Err(())
 170         }
 171
 172         fn next_line(&mut self) {
 173             self.pos = 0;
 174             self.current_word_begin = 0;
 175             self.line += 1;
 176             self.new_line = true;
 177         }
 178
 179         fn put_back(&mut self, c: char) {
 180             self.pos -= c.len_utf8();
 181         }
 182
 183         #[allow(cast_possible_truncation)]
 184         fn word(&self) -> (&'a str, Span) {
 185             let begin = self.current_word_begin;
 186             let end = self.pos;
 187
 188             debug_assert_eq!(end as u32 as usize, end);
 189             debug_assert_eq!(begin as u32 as usize, begin);
 190
 191             let (doc, mut span) = self.line();
 192             span.hi = span.lo + BytePos(end as u32);
 193             span.lo = span.lo + BytePos(begin as u32);
 194
 195             (&doc[begin..end], span)
 196         }
 197     }
 198
 199     impl<'a> Iterator for Parser<'a> {
 200         type Item = (bool, char);
 201
 202         fn next(&mut self) -> Option<(bool, char)> {
 203             if self.line < self.docs.len() {
 204                 if self.reset {
 205                     self.line += 1;
 206                     self.reset = false;
 207                     self.pos = 0;
 208                     self.current_word_begin = 0;
 209                 }
 210
 211                 let mut chars = self.line().0[self.pos..].chars();
 212                 let c = chars.next();
 213
 214                 if let Some(c) = c {
 215                     self.pos += c.len_utf8();
 216                     let new_line = self.new_line;
 217                     self.new_line = c == '\n' || (self.new_line && c.is_whitespace());
 218                     Some((new_line, c))
 219                 } else if self.line == self.docs.len() - 1 {
 220                     None
 221                 } else {
 222                     self.new_line = true;
 223                     self.reset = true;
 224                     self.pos += 1;
 225                     Some((true, '\n'))
 226                 }
 227             } else {
 228                 None
 229             }
 230         }
 231     }
 232
 233     let mut parser = Parser {
 234         current_word_begin: 0,
 235         docs: docs,
 236         line: 0,
 237         link: false,
 238         new_line: true,
 239         reset: false,
 240         pos: 0,
 241     };
 242
 243     /// Check for fanced code block.
 244     macro_rules! check_block {
 245         ($parser:expr, $c:tt, $new_line:expr) => {{
 246             check_block!($parser, $c, $c, $new_line)
 247         }};
 248
 249         ($parser:expr, $c:pat, $c_expr:expr, $new_line:expr) => {{
 250             fn check_block(parser: &mut Parser, new_line: bool) -> Result<bool, ()> {
 251                 if new_line {
 252                     let mut lookup_parser = parser.clone();
 253                     if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) {
 254                         *parser = lookup_parser;
 255                         // 3 or more ` or ~ open a code block to be closed with the same number of ` or ~
 256                         let mut open_count = 3;
 257                         while let Some((false, $c)) = parser.next() {
 258                             open_count += 1;
 259                         }
 260
 261                         loop {
 262                             loop {
 263                                 if try!(parser.jump_to($c_expr)) {
 264                                     break;
 265                                 }
 266                             }
 267
 268                             lookup_parser = parser.clone();
 269                             let a = lookup_parser.next();
 270                             let b = lookup_parser.next();
 271                             if let (Some((false, $c)), Some((false, $c))) = (a, b) {
 272                                 let mut close_count = 3;
 273                                 while let Some((false, $c)) = lookup_parser.next() {
 274                                     close_count += 1;
 275                                 }
 276
 277                                 if close_count == open_count {
 278                                     *parser = lookup_parser;
 279                                     return Ok(true);
 280                                 }
 281                             }
 282                         }
 283                     }
 284                 }
 285
 286                 Ok(false)
 287             }
 288
 289             check_block(&mut $parser, $new_line)
 290         }};
 291     }
 292
 293     loop {
 294         match parser.next() {
 295             Some((new_line, c)) => {
 296                 match c {
 297                     '#' if new_line => {
 298                         // don’t warn on titles
 299                         parser.next_line();
 300                     },
 301                     '`' => {
 302                         if try!(check_block!(parser, '`', new_line)) {
 303                             continue;
 304                         }
 305
 306                         // not a code block, just inline code
 307                         try!(parser.jump_to('`'));
 308                     },
 309                     '~' => {
 310                         if try!(check_block!(parser, '~', new_line)) {
 311                             continue;
 312                         }
 313
 314                         // ~ does not introduce inline code, but two of them introduce
 315                         // strikethrough. Too bad for the consistency but we don't care about
 316                         // strikethrough.
 317                     },
 318                     '[' => {
 319                         // Check for a reference definition `[foo]:` at the beginning of a line
 320                         let mut link = true;
 321
 322                         if new_line {
 323                             let mut lookup_parser = parser.clone();
 324                             if lookup_parser.any(|(_, c)| c == ']') {
 325                                 if let Some((_, ':')) = lookup_parser.next() {
 326                                     lookup_parser.next_line();
 327                                     parser = lookup_parser;
 328                                     link = false;
 329                                 }
 330                             }
 331                         }
 332
 333                         parser.advance_begin();
 334                         parser.link = link;
 335                     },
 336                     ']' if parser.link => {
 337                         parser.link = false;
 338
 339                         match parser.peek() {
 340                             Some('(') => {
 341                                 try!(parser.jump_to(')'));
 342                             },
 343                             Some('[') => {
 344                                 try!(parser.jump_to(']'));
 345                             },
 346                             Some(_) => continue,
 347                             None => return Err(()),
 348                         }
 349                     },
 350                     c if !is_path_char(c) => {
 351                         parser.advance_begin();
 352                     },
 353                     _ => {
 354                         if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) {
 355                             parser.put_back(c);
 356                         }
 357
 358                         let (word, span) = parser.word();
 359                         check_word(cx, valid_idents, word, span);
 360                         parser.advance_begin();
 361                     },
 362                 }
 363
 364             },
 365             None => break,
 366         }
 367     }
 368
 369     Ok(())
 370 }
 371
 372 fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) {
 373     /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is
 374     /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok).
 375     fn is_camel_case(s: &str) -> bool {
 376         if s.starts_with(|c: char| c.is_digit(10)) {
 377             return false;
 378         }
 379
 380         let s = if s.ends_with('s') {
 381             &s[..s.len() - 1]
 382         } else {
 383             s
 384         };
 385
 386         s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
 387         s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
 388     }
 389
 390     fn has_underscore(s: &str) -> bool {
 391         s != "_" && !s.contains("\\_") && s.contains('_')
 392     }
 393
 394     // Trim punctuation as in `some comment (see foo::bar).`
 395     //                                                   ^^
 396     // Or even as in `_foo bar_` which is emphasized.
 397     let word = word.trim_matches(|c: char| !c.is_alphanumeric());
 398
 399     if valid_idents.iter().any(|i| i == word) {
 400         return;
 401     }
 402
 403     if has_underscore(word) || word.contains("::") || is_camel_case(word) {
 404         span_lint(cx,
 405                   DOC_MARKDOWN,
 406                   span,
 407                   &format!("you should put `{}` between ticks in the documentation", word));
 408     }
 409 }