clippy_lints/src/doc.rs

   1 use itertools::Itertools;
   2 use pulldown_cmark;
   3 use rustc::lint::*;
   4 use syntax::ast;
   5 use syntax::codemap::{Span, BytePos};
   6 use syntax_pos::Pos;
   7 use utils::span_lint;
   8
   9 /// **What it does:** Checks for the presence of `_`, `::` or camel-case words
  10 /// outside ticks in documentation.
  11 ///
  12 /// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and
  13 /// camel-case probably indicates some code which should be included between
  14 /// ticks. `_` can also be used for empasis in markdown, this lint tries to
  15 /// consider that.
  16 ///
  17 /// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks
  18 /// for is limited, and there are still false positives.
  19 ///
  20 /// **Examples:**
  21 /// ```rust
  22 /// /// Do something with the foo_bar parameter. See also that::other::module::foo.
  23 /// // ^ `foo_bar` and `that::other::module::foo` should be ticked.
  24 /// fn doit(foo_bar) { .. }
  25 /// ```
  26 declare_lint! {
  27     pub DOC_MARKDOWN,
  28     Warn,
  29     "presence of `_`, `::` or camel-case outside backticks in documentation"
  30 }
  31
  32 #[derive(Clone)]
  33 pub struct Doc {
  34     valid_idents: Vec<String>,
  35 }
  36
  37 impl Doc {
  38     pub fn new(valid_idents: Vec<String>) -> Self {
  39         Doc { valid_idents: valid_idents }
  40     }
  41 }
  42
  43 impl LintPass for Doc {
  44     fn get_lints(&self) -> LintArray {
  45         lint_array![DOC_MARKDOWN]
  46     }
  47 }
  48
  49 impl EarlyLintPass for Doc {
  50     fn check_crate(&mut self, cx: &EarlyContext, krate: &ast::Crate) {
  51         check_attrs(cx, &self.valid_idents, &krate.attrs);
  52     }
  53
  54     fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) {
  55         check_attrs(cx, &self.valid_idents, &item.attrs);
  56     }
  57 }
  58
  59 struct Parser<'a> {
  60     parser: pulldown_cmark::Parser<'a>,
  61 }
  62
  63 impl<'a> Parser<'a> {
  64     fn new(parser: pulldown_cmark::Parser<'a>) -> Parser<'a> {
  65         Self { parser: parser }
  66     }
  67 }
  68
  69 impl<'a> Iterator for Parser<'a> {
  70     type Item = (usize, pulldown_cmark::Event<'a>);
  71
  72     fn next(&mut self) -> Option<Self::Item> {
  73         let offset = self.parser.get_offset();
  74         self.parser.next().map(|event| (offset, event))
  75     }
  76 }
  77
  78 /// Cleanup documentation decoration (`///` and such).
  79 ///
  80 /// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or
  81 /// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we need to keep track of
  82 /// the spans but this function is inspired from the later.
  83 #[allow(cast_possible_truncation)]
  84 pub fn strip_doc_comment_decoration(comment: &str, span: Span) -> (String, Vec<(usize, Span)>) {
  85     // one-line comments lose their prefix
  86     const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
  87     for prefix in ONELINERS {
  88         if comment.starts_with(*prefix) {
  89             let doc = &comment[prefix.len()..];
  90             let mut doc = doc.to_owned();
  91             doc.push('\n');
  92             return (doc.to_owned(), vec![(doc.len(), Span { lo: span.lo + BytePos(prefix.len() as u32), ..span })]);
  93         }
  94     }
  95
  96     if comment.starts_with("/*") {
  97         let doc = &comment[3..comment.len() - 2];
  98         let mut sizes = vec![];
  99
 100         for line in doc.lines() {
 101             let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
 102             debug_assert_eq!(offset as u32 as usize, offset);
 103
 104             // +1 for the newline
 105             sizes.push((line.len() + 1, Span { lo: span.lo + BytePos(offset as u32), ..span }));
 106         }
 107
 108         return (doc.to_string(), sizes);
 109     }
 110
 111     panic!("not a doc-comment: {}", comment);
 112 }
 113
 114 pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
 115     let mut doc = String::new();
 116     let mut spans = vec![];
 117
 118     for attr in attrs {
 119         if attr.is_sugared_doc {
 120             if let Some(ref current) = attr.value_str() {
 121                 let current = current.to_string();
 122                 let (current, current_spans) = strip_doc_comment_decoration(&current, attr.span);
 123                 spans.extend_from_slice(&current_spans);
 124                 doc.push_str(&current);
 125             }
 126         } else if let Some(name) = attr.name() {
 127             // ignore mix of sugared and non-sugared doc
 128             if name == "doc" {
 129                 return;
 130             }
 131         }
 132     }
 133
 134     let mut current = 0;
 135     for &mut (ref mut offset, _) in &mut spans {
 136         let offset_copy = *offset;
 137         *offset = current;
 138         current += offset_copy;
 139     }
 140
 141     if !doc.is_empty() {
 142         let parser = Parser::new(pulldown_cmark::Parser::new(&doc));
 143         let parser = parser.coalesce(|x, y| {
 144             use pulldown_cmark::Event::*;
 145
 146             let x_offset = x.0;
 147             let y_offset = y.0;
 148
 149             match (x.1, y.1) {
 150                 (Text(x), Text(y)) => {
 151                     let mut x = x.into_owned();
 152                     x.push_str(&y);
 153                     Ok((x_offset, Text(x.into())))
 154                 },
 155                 (x, y) => Err(((x_offset, x), (y_offset, y))),
 156             }
 157         });
 158         check_doc(cx, valid_idents, parser, &spans);
 159     }
 160 }
 161
 162 fn check_doc<'a, Events: Iterator<Item = (usize, pulldown_cmark::Event<'a>)>>(
 163     cx: &EarlyContext,
 164     valid_idents: &[String],
 165     docs: Events,
 166     spans: &[(usize, Span)]
 167 ) {
 168     use pulldown_cmark::Event::*;
 169     use pulldown_cmark::Tag::*;
 170
 171     let mut in_code = false;
 172
 173     for (offset, event) in docs {
 174         match event {
 175             Start(CodeBlock(_)) |
 176             Start(Code) => in_code = true,
 177             End(CodeBlock(_)) |
 178             End(Code) => in_code = false,
 179             Start(_tag) | End(_tag) => (), // We don't care about other tags
 180             Html(_html) |
 181             InlineHtml(_html) => (), // HTML is weird, just ignore it
 182             SoftBreak => (),
 183             HardBreak => (),
 184             FootnoteReference(text) |
 185             Text(text) => {
 186                 if !in_code {
 187                     let index = match spans.binary_search_by(|c| c.0.cmp(&offset)) {
 188                         Ok(o) => o,
 189                         Err(e) => e - 1,
 190                     };
 191
 192                     let (begin, span) = spans[index];
 193
 194                     // Adjust for the begining of the current `Event`
 195                     let span = Span { lo: span.lo + BytePos::from_usize(offset - begin), ..span };
 196
 197                     check_text(cx, valid_idents, &text, span);
 198                 }
 199             },
 200         }
 201     }
 202 }
 203
 204 fn check_text(cx: &EarlyContext, valid_idents: &[String], text: &str, span: Span) {
 205     for word in text.split_whitespace() {
 206         // Trim punctuation as in `some comment (see foo::bar).`
 207         //                                                   ^^
 208         // Or even as in `_foo bar_` which is emphasized.
 209         let word = word.trim_matches(|c: char| !c.is_alphanumeric());
 210
 211         if valid_idents.iter().any(|i| i == word) {
 212             continue;
 213         }
 214
 215         // Adjust for the current word
 216         let offset = word.as_ptr() as usize - text.as_ptr() as usize;
 217         let span = Span {
 218             lo: span.lo + BytePos::from_usize(offset),
 219             hi: span.lo + BytePos::from_usize(offset + word.len()),
 220             ..span
 221         };
 222
 223         check_word(cx, word, span);
 224     }
 225 }
 226
 227 fn check_word(cx: &EarlyContext, word: &str, span: Span) {
 228     /// Checks if a string is camel-case, ie. contains at least two uppercase letter (`Clippy` is
 229     /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok).
 230     fn is_camel_case(s: &str) -> bool {
 231         if s.starts_with(|c: char| c.is_digit(10)) {
 232             return false;
 233         }
 234
 235         let s = if s.ends_with('s') {
 236             &s[..s.len() - 1]
 237         } else {
 238             s
 239         };
 240
 241         s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
 242         s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
 243     }
 244
 245     fn has_underscore(s: &str) -> bool {
 246         s != "_" && !s.contains("\\_") && s.contains('_')
 247     }
 248
 249     if has_underscore(word) || word.contains("::") || is_camel_case(word) {
 250         span_lint(cx,
 251                   DOC_MARKDOWN,
 252                   span,
 253                   &format!("you should put `{}` between ticks in the documentation", word));
 254     }
 255 }