X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=clippy_lints%2Fsrc%2Fdoc.rs;h=6dbf2ea959e6a667f1be86e3a52079e48a3bf570;hb=93c48a0977927fc3678600d0d79ef43e8f30761f;hp=cf32c1731fa3124a55c6544c8b47036838e00361;hpb=bf227f4729ecc63147bacaf05d161f3819d13d7e;p=rust.git diff --git a/clippy_lints/src/doc.rs b/clippy_lints/src/doc.rs index cf32c1731fa..6dbf2ea959e 100644 --- a/clippy_lints/src/doc.rs +++ b/clippy_lints/src/doc.rs @@ -1,26 +1,33 @@ +use itertools::Itertools; +use pulldown_cmark; use rustc::lint::*; use syntax::ast; use syntax::codemap::{Span, BytePos}; +use syntax_pos::Pos; use utils::span_lint; -/// **What it does:** This lint checks for the presence of `_`, `::` or camel-case words outside -/// ticks in documentation. +/// **What it does:** Checks for the presence of `_`, `::` or camel-case words +/// outside ticks in documentation. /// -/// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and camel-case probably -/// indicates some code which should be included between ticks. `_` can also be used for empasis in -/// markdown, this lint tries to consider that. +/// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and +/// camel-case probably indicates some code which should be included between +/// ticks. `_` can also be used for empasis in markdown, this lint tries to +/// consider that. /// -/// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks for is limited. +/// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks +/// for is limited, and there are still false positives. /// /// **Examples:** /// ```rust -/// /// Do something with the foo_bar parameter. See also that::other::module::foo. +/// /// Do something with the foo_bar parameter. See also +/// that::other::module::foo. /// // ^ `foo_bar` and `that::other::module::foo` should be ticked. /// fn doit(foo_bar) { .. } /// ``` declare_lint! { - pub DOC_MARKDOWN, Warn, - "checks for the presence of `_`, `::` or camel-case outside ticks in documentation" + pub DOC_MARKDOWN, + Warn, + "presence of `_`, `::` or camel-case outside backticks in documentation" } #[derive(Clone)] @@ -50,153 +57,217 @@ fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) { } } -pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) { - let mut in_multiline = false; - for attr in attrs { - if attr.node.is_sugared_doc { - if let ast::MetaItemKind::NameValue(_, ref doc) = attr.node.value.node { - if let ast::LitKind::Str(ref doc, _) = doc.node { - // doc comments start with `///` or `//!` - let real_doc = &doc[3..]; - let mut span = attr.span; - span.lo = span.lo + BytePos(3); - - // check for multiline code blocks - if real_doc.trim_left().starts_with("```") { - in_multiline = !in_multiline; - } - if !in_multiline { - check_doc(cx, valid_idents, real_doc, span); - } - } - } - } +struct Parser<'a> { + parser: pulldown_cmark::Parser<'a>, +} + +impl<'a> Parser<'a> { + fn new(parser: pulldown_cmark::Parser<'a>) -> Parser<'a> { + Self { parser: parser } } } -macro_rules! jump_to { - // Get the next character’s first byte UTF-8 friendlyly. - (@next_char, $chars: expr, $len: expr) => {{ - if let Some(&(pos, _)) = $chars.peek() { - pos - } else { - $len +impl<'a> Iterator for Parser<'a> { + type Item = (usize, pulldown_cmark::Event<'a>); + + fn next(&mut self) -> Option { + let offset = self.parser.get_offset(); + self.parser.next().map(|event| (offset, event)) + } +} + +/// Cleanup documentation decoration (`///` and such). +/// +/// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or +/// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we +/// need to keep track of +/// the spans but this function is inspired from the later. +#[allow(cast_possible_truncation)] +pub fn strip_doc_comment_decoration(comment: &str, span: Span) -> (String, Vec<(usize, Span)>) { + // one-line comments lose their prefix + const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"]; + for prefix in ONELINERS { + if comment.starts_with(*prefix) { + let doc = &comment[prefix.len()..]; + let mut doc = doc.to_owned(); + doc.push('\n'); + return ( + doc.to_owned(), + vec![ + ( + doc.len(), + Span { + lo: span.lo + BytePos(prefix.len() as u32), + ..span + } + ), + ], + ); } - }}; + } - // Jump to the next `$c`. If no such character is found, give up. - ($chars: expr, $c: expr, $len: expr) => {{ - if $chars.find(|&(_, c)| c == $c).is_some() { - jump_to!(@next_char, $chars, $len) + if comment.starts_with("/*") { + let doc = &comment[3..comment.len() - 2]; + let mut sizes = vec![]; + let mut contains_initial_stars = false; + for line in doc.lines() { + let offset = line.as_ptr() as usize - comment.as_ptr() as usize; + debug_assert_eq!(offset as u32 as usize, offset); + contains_initial_stars |= line.trim_left().starts_with('*'); + // +1 for the newline + sizes.push(( + line.len() + 1, + Span { + lo: span.lo + BytePos(offset as u32), + ..span + }, + )); + } + if !contains_initial_stars { + return (doc.to_string(), sizes); } - else { - return; + // remove the initial '*'s if any + let mut no_stars = String::with_capacity(doc.len()); + for line in doc.lines() { + let mut chars = line.chars(); + while let Some(c) = chars.next() { + if c.is_whitespace() { + no_stars.push(c); + } else { + no_stars.push(if c == '*' { ' ' } else { c }); + break; + } + } + no_stars.push_str(chars.as_str()); + no_stars.push('\n'); } - }}; + return (no_stars, sizes); + } + + panic!("not a doc-comment: {}", comment); } -#[allow(while_let_loop)] // #362 -pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], doc: &str, span: Span) { - // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context. - // There really is no markdown specification that would disambiguate this properly. This is - // what GitHub and Rustdoc do: - // - // foo_bar test_quz → foo_bar test_quz - // foo_bar_baz → foo_bar_baz (note that the “official” spec says this should be emphasized) - // _foo bar_ test_quz_ → foo bar test_quz_ - // \_foo bar\_ → _foo bar_ - // (_baz_) → (baz) - // foo _ bar _ baz → foo _ bar _ baz - - /// Character that can appear in a word - fn is_word_char(c: char) -> bool { - match c { - t if t.is_alphanumeric() => true, - ':' | '_' => true, - _ => false, +pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) { + let mut doc = String::new(); + let mut spans = vec![]; + + for attr in attrs { + if attr.is_sugared_doc { + if let Some(ref current) = attr.value_str() { + let current = current.to_string(); + let (current, current_spans) = strip_doc_comment_decoration(¤t, attr.span); + spans.extend_from_slice(¤t_spans); + doc.push_str(¤t); + } + } else if let Some(name) = attr.name() { + // ignore mix of sugared and non-sugared doc + if name == "doc" { + return; + } } } - #[allow(cast_possible_truncation)] - fn word_span(mut span: Span, begin: usize, end: usize) -> Span { - debug_assert_eq!(end as u32 as usize, end); - debug_assert_eq!(begin as u32 as usize, begin); - span.hi = span.lo + BytePos(end as u32); - span.lo = span.lo + BytePos(begin as u32); - span - } - - let mut new_line = true; - let len = doc.len(); - let mut chars = doc.char_indices().peekable(); - let mut current_word_begin = 0; - loop { - match chars.next() { - Some((_, c)) => { - match c { - '#' if new_line => { // don’t warn on titles - current_word_begin = jump_to!(chars, '\n', len); - } - '`' => { - current_word_begin = jump_to!(chars, '`', len); - } - '[' => { - let end = jump_to!(chars, ']', len); - let link_text = &doc[current_word_begin + 1..end]; - let word_span = word_span(span, current_word_begin + 1, end + 1); - - match chars.peek() { - Some(&(_, c)) => { - // Trying to parse a link. Let’s ignore the link. - - // FIXME: how does markdown handles such link? - // https://en.wikipedia.org/w/index.php?title=) - match c { - '(' => { // inline link - current_word_begin = jump_to!(chars, ')', len); - check_doc(cx, valid_idents, link_text, word_span); - } - '[' => { // reference link - current_word_begin = jump_to!(chars, ']', len); - check_doc(cx, valid_idents, link_text, word_span); - } - ':' => { // reference link - current_word_begin = jump_to!(chars, '\n', len); - } - _ => { // automatic reference link - current_word_begin = jump_to!(@next_char, chars, len); - check_doc(cx, valid_idents, link_text, word_span); - } - } - } - None => return, - } - } - // anything that’s neither alphanumeric nor '_' is not part of an ident anyway - c if !c.is_alphanumeric() && c != '_' => { - current_word_begin = jump_to!(@next_char, chars, len); - } - _ => { - let end = match chars.find(|&(_, c)| !is_word_char(c)) { - Some((end, _)) => end, - None => len, - }; - let word_span = word_span(span, current_word_begin, end); - check_word(cx, valid_idents, &doc[current_word_begin..end], word_span); - current_word_begin = jump_to!(@next_char, chars, len); - } - } + let mut current = 0; + for &mut (ref mut offset, _) in &mut spans { + let offset_copy = *offset; + *offset = current; + current += offset_copy; + } + + if !doc.is_empty() { + let parser = Parser::new(pulldown_cmark::Parser::new(&doc)); + let parser = parser.coalesce(|x, y| { + use pulldown_cmark::Event::*; - new_line = c == '\n' || (new_line && c.is_whitespace()); + let x_offset = x.0; + let y_offset = y.0; + + match (x.1, y.1) { + (Text(x), Text(y)) => { + let mut x = x.into_owned(); + x.push_str(&y); + Ok((x_offset, Text(x.into()))) + }, + (x, y) => Err(((x_offset, x), (y_offset, y))), } - None => break, + }); + check_doc(cx, valid_idents, parser, &spans); + } +} + +fn check_doc<'a, Events: Iterator)>>( + cx: &EarlyContext, + valid_idents: &[String], + docs: Events, + spans: &[(usize, Span)], +) { + use pulldown_cmark::Event::*; + use pulldown_cmark::Tag::*; + + let mut in_code = false; + + for (offset, event) in docs { + match event { + Start(CodeBlock(_)) | + Start(Code) => in_code = true, + End(CodeBlock(_)) | + End(Code) => in_code = false, + Start(_tag) | End(_tag) => (), // We don't care about other tags + Html(_html) | + InlineHtml(_html) => (), // HTML is weird, just ignore it + SoftBreak => (), + HardBreak => (), + FootnoteReference(text) | + Text(text) => { + if !in_code { + let index = match spans.binary_search_by(|c| c.0.cmp(&offset)) { + Ok(o) => o, + Err(e) => e - 1, + }; + + let (begin, span) = spans[index]; + + // Adjust for the begining of the current `Event` + let span = Span { + lo: span.lo + BytePos::from_usize(offset - begin), + ..span + }; + + check_text(cx, valid_idents, &text, span); + } + }, + } + } +} + +fn check_text(cx: &EarlyContext, valid_idents: &[String], text: &str, span: Span) { + for word in text.split_whitespace() { + // Trim punctuation as in `some comment (see foo::bar).` + // ^^ + // Or even as in `_foo bar_` which is emphasized. + let word = word.trim_matches(|c: char| !c.is_alphanumeric()); + + if valid_idents.iter().any(|i| i == word) { + continue; } + + // Adjust for the current word + let offset = word.as_ptr() as usize - text.as_ptr() as usize; + let span = Span { + lo: span.lo + BytePos::from_usize(offset), + hi: span.lo + BytePos::from_usize(offset + word.len()), + ..span + }; + + check_word(cx, word, span); } } -fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) { - /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is - /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok). +fn check_word(cx: &EarlyContext, word: &str, span: Span) { + /// Checks if a string is camel-case, ie. contains at least two uppercase + /// letter (`Clippy` is + /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded + /// (`IDs` is ok). fn is_camel_case(s: &str) -> bool { if s.starts_with(|c: char| c.is_digit(10)) { return false; @@ -208,28 +279,20 @@ fn is_camel_case(s: &str) -> bool { s }; - s.chars().all(char::is_alphanumeric) && - s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 && - s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0 + s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 && + s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0 } fn has_underscore(s: &str) -> bool { s != "_" && !s.contains("\\_") && s.contains('_') } - // Trim punctuation as in `some comment (see foo::bar).` - // ^^ - // Or even as in `_foo bar_` which is emphasized. - let word = word.trim_matches(|c: char| !c.is_alphanumeric()); - - if valid_idents.iter().any(|i| i == word) { - return; - } - if has_underscore(word) || word.contains("::") || is_camel_case(word) { - span_lint(cx, - DOC_MARKDOWN, - span, - &format!("you should put `{}` between ticks in the documentation", word)); + span_lint( + cx, + DOC_MARKDOWN, + span, + &format!("you should put `{}` between ticks in the documentation", word), + ); } }