clippy_lints/src/doc.rs

   1 // Copyright 2014-2018 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution.
   3 //
   4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   7 // option. This file may not be copied, modified, or distributed
   8 // except according to those terms.
   9
  10 use crate::utils::span_lint;
  11 use itertools::Itertools;
  12 use pulldown_cmark;
  13 use rustc::lint::{EarlyContext, EarlyLintPass, LintArray, LintPass};
  14 use rustc::{declare_tool_lint, lint_array};
  15 use rustc_data_structures::fx::FxHashSet;
  16 use syntax::ast;
  17 use syntax::source_map::{BytePos, Span};
  18 use syntax_pos::Pos;
  19 use url::Url;
  20
  21 /// **What it does:** Checks for the presence of `_`, `::` or camel-case words
  22 /// outside ticks in documentation.
  23 ///
  24 /// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and
  25 /// camel-case probably indicates some code which should be included between
  26 /// ticks. `_` can also be used for emphasis in markdown, this lint tries to
  27 /// consider that.
  28 ///
  29 /// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks
  30 /// for is limited, and there are still false positives.
  31 ///
  32 /// **Examples:**
  33 /// ```rust
  34 /// /// Do something with the foo_bar parameter. See also
  35 /// /// that::other::module::foo.
  36 /// // ^ `foo_bar` and `that::other::module::foo` should be ticked.
  37 /// fn doit(foo_bar) { .. }
  38 /// ```
  39 declare_clippy_lint! {
  40     pub DOC_MARKDOWN,
  41     pedantic,
  42     "presence of `_`, `::` or camel-case outside backticks in documentation"
  43 }
  44
  45 #[derive(Clone)]
  46 pub struct Doc {
  47     valid_idents: FxHashSet<String>,
  48 }
  49
  50 impl Doc {
  51     pub fn new(valid_idents: FxHashSet<String>) -> Self {
  52         Self { valid_idents }
  53     }
  54 }
  55
  56 impl LintPass for Doc {
  57     fn get_lints(&self) -> LintArray {
  58         lint_array![DOC_MARKDOWN]
  59     }
  60 }
  61
  62 impl EarlyLintPass for Doc {
  63     fn check_crate(&mut self, cx: &EarlyContext<'_>, krate: &ast::Crate) {
  64         check_attrs(cx, &self.valid_idents, &krate.attrs);
  65     }
  66
  67     fn check_item(&mut self, cx: &EarlyContext<'_>, item: &ast::Item) {
  68         check_attrs(cx, &self.valid_idents, &item.attrs);
  69     }
  70 }
  71
  72 struct Parser<'a> {
  73     parser: pulldown_cmark::Parser<'a>,
  74 }
  75
  76 impl<'a> Parser<'a> {
  77     fn new(parser: pulldown_cmark::Parser<'a>) -> Self {
  78         Self { parser }
  79     }
  80 }
  81
  82 impl<'a> Iterator for Parser<'a> {
  83     type Item = (usize, pulldown_cmark::Event<'a>);
  84
  85     fn next(&mut self) -> Option<Self::Item> {
  86         let offset = self.parser.get_offset();
  87         self.parser.next().map(|event| (offset, event))
  88     }
  89 }
  90
  91 /// Cleanup documentation decoration (`///` and such).
  92 ///
  93 /// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or
  94 /// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we
  95 /// need to keep track of
  96 /// the spans but this function is inspired from the later.
  97 #[allow(clippy::cast_possible_truncation)]
  98 pub fn strip_doc_comment_decoration(comment: &str, span: Span) -> (String, Vec<(usize, Span)>) {
  99     // one-line comments lose their prefix
 100     const ONELINERS: &[&str] = &["///!", "///", "//!", "//"];
 101     for prefix in ONELINERS {
 102         if comment.starts_with(*prefix) {
 103             let doc = &comment[prefix.len()..];
 104             let mut doc = doc.to_owned();
 105             doc.push('\n');
 106             return (
 107                 doc.to_owned(),
 108                 vec![(doc.len(), span.with_lo(span.lo() + BytePos(prefix.len() as u32)))],
 109             );
 110         }
 111     }
 112
 113     if comment.starts_with("/*") {
 114         let doc = &comment[3..comment.len() - 2];
 115         let mut sizes = vec![];
 116         let mut contains_initial_stars = false;
 117         for line in doc.lines() {
 118             let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
 119             debug_assert_eq!(offset as u32 as usize, offset);
 120             contains_initial_stars |= line.trim_start().starts_with('*');
 121             // +1 for the newline
 122             sizes.push((line.len() + 1, span.with_lo(span.lo() + BytePos(offset as u32))));
 123         }
 124         if !contains_initial_stars {
 125             return (doc.to_string(), sizes);
 126         }
 127         // remove the initial '*'s if any
 128         let mut no_stars = String::with_capacity(doc.len());
 129         for line in doc.lines() {
 130             let mut chars = line.chars();
 131             while let Some(c) = chars.next() {
 132                 if c.is_whitespace() {
 133                     no_stars.push(c);
 134                 } else {
 135                     no_stars.push(if c == '*' { ' ' } else { c });
 136                     break;
 137                 }
 138             }
 139             no_stars.push_str(chars.as_str());
 140             no_stars.push('\n');
 141         }
 142         return (no_stars, sizes);
 143     }
 144
 145     panic!("not a doc-comment: {}", comment);
 146 }
 147
 148 pub fn check_attrs<'a>(cx: &EarlyContext<'_>, valid_idents: &FxHashSet<String>, attrs: &'a [ast::Attribute]) {
 149     let mut doc = String::new();
 150     let mut spans = vec![];
 151
 152     for attr in attrs {
 153         if attr.is_sugared_doc {
 154             if let Some(ref current) = attr.value_str() {
 155                 let current = current.to_string();
 156                 let (current, current_spans) = strip_doc_comment_decoration(&current, attr.span);
 157                 spans.extend_from_slice(&current_spans);
 158                 doc.push_str(&current);
 159             }
 160         } else if attr.name() == "doc" {
 161             // ignore mix of sugared and non-sugared doc
 162             return;
 163         }
 164     }
 165
 166     let mut current = 0;
 167     for &mut (ref mut offset, _) in &mut spans {
 168         let offset_copy = *offset;
 169         *offset = current;
 170         current += offset_copy;
 171     }
 172
 173     if !doc.is_empty() {
 174         let parser = Parser::new(pulldown_cmark::Parser::new(&doc));
 175         let parser = parser.coalesce(|x, y| {
 176             use pulldown_cmark::Event::*;
 177
 178             let x_offset = x.0;
 179             let y_offset = y.0;
 180
 181             match (x.1, y.1) {
 182                 (Text(x), Text(y)) => {
 183                     let mut x = x.into_owned();
 184                     x.push_str(&y);
 185                     Ok((x_offset, Text(x.into())))
 186                 },
 187                 (x, y) => Err(((x_offset, x), (y_offset, y))),
 188             }
 189         });
 190         check_doc(cx, valid_idents, parser, &spans);
 191     }
 192 }
 193
 194 fn check_doc<'a, Events: Iterator<Item = (usize, pulldown_cmark::Event<'a>)>>(
 195     cx: &EarlyContext<'_>,
 196     valid_idents: &FxHashSet<String>,
 197     docs: Events,
 198     spans: &[(usize, Span)],
 199 ) {
 200     use pulldown_cmark::Event::*;
 201     use pulldown_cmark::Tag::*;
 202
 203     let mut in_code = false;
 204     let mut in_link = None;
 205
 206     for (offset, event) in docs {
 207         match event {
 208             Start(CodeBlock(_)) | Start(Code) => in_code = true,
 209             End(CodeBlock(_)) | End(Code) => in_code = false,
 210             Start(Link(link, _)) => in_link = Some(link),
 211             End(Link(_, _)) => in_link = None,
 212             Start(_tag) | End(_tag) => (),         // We don't care about other tags
 213             Html(_html) | InlineHtml(_html) => (), // HTML is weird, just ignore it
 214             SoftBreak | HardBreak => (),
 215             FootnoteReference(text) | Text(text) => {
 216                 if Some(&text) == in_link.as_ref() {
 217                     // Probably a link of the form `<http://example.com>`
 218                     // Which are represented as a link to "http://example.com" with
 219                     // text "http://example.com" by pulldown-cmark
 220                     continue;
 221                 }
 222
 223                 if !in_code {
 224                     let index = match spans.binary_search_by(|c| c.0.cmp(&offset)) {
 225                         Ok(o) => o,
 226                         Err(e) => e - 1,
 227                     };
 228
 229                     let (begin, span) = spans[index];
 230
 231                     // Adjust for the beginning of the current `Event`
 232                     let span = span.with_lo(span.lo() + BytePos::from_usize(offset - begin));
 233
 234                     check_text(cx, valid_idents, &text, span);
 235                 }
 236             },
 237         }
 238     }
 239 }
 240
 241 fn check_text(cx: &EarlyContext<'_>, valid_idents: &FxHashSet<String>, text: &str, span: Span) {
 242     for word in text.split(|c: char| c.is_whitespace() || c == '\'') {
 243         // Trim punctuation as in `some comment (see foo::bar).`
 244         //                                                   ^^
 245         // Or even as in `_foo bar_` which is emphasized.
 246         let word = word.trim_matches(|c: char| !c.is_alphanumeric());
 247
 248         if valid_idents.contains(word) {
 249             continue;
 250         }
 251
 252         // Adjust for the current word
 253         let offset = word.as_ptr() as usize - text.as_ptr() as usize;
 254         let span = Span::new(
 255             span.lo() + BytePos::from_usize(offset),
 256             span.lo() + BytePos::from_usize(offset + word.len()),
 257             span.ctxt(),
 258         );
 259
 260         check_word(cx, word, span);
 261     }
 262 }
 263
 264 fn check_word(cx: &EarlyContext<'_>, word: &str, span: Span) {
 265     /// Checks if a string is camel-case, ie. contains at least two uppercase
 266     /// letter (`Clippy` is
 267     /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded
 268     /// (`IDs` is ok).
 269     fn is_camel_case(s: &str) -> bool {
 270         if s.starts_with(|c: char| c.is_digit(10)) {
 271             return false;
 272         }
 273
 274         let s = if s.ends_with('s') { &s[..s.len() - 1] } else { s };
 275
 276         s.chars().all(char::is_alphanumeric)
 277             && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1
 278             && s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
 279     }
 280
 281     fn has_underscore(s: &str) -> bool {
 282         s != "_" && !s.contains("\\_") && s.contains('_')
 283     }
 284
 285     fn has_hyphen(s: &str) -> bool {
 286         s != "-" && s.contains('-')
 287     }
 288
 289     if let Ok(url) = Url::parse(word) {
 290         // try to get around the fact that `foo::bar` parses as a valid URL
 291         if !url.cannot_be_a_base() {
 292             span_lint(
 293                 cx,
 294                 DOC_MARKDOWN,
 295                 span,
 296                 "you should put bare URLs between `<`/`>` or make a proper Markdown link",
 297             );
 298
 299             return;
 300         }
 301     }
 302
 303     // We assume that mixed-case words are not meant to be put inside bacticks. (Issue #2343)
 304     if has_underscore(word) && has_hyphen(word) {
 305         return;
 306     }
 307
 308     if has_underscore(word) || word.contains("::") || is_camel_case(word) {
 309         span_lint(
 310             cx,
 311             DOC_MARKDOWN,
 312             span,
 313             &format!("you should put `{}` between ticks in the documentation", word),
 314         );
 315     }
 316 }