X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=clippy_lints%2Fsrc%2Fdoc.rs;h=6dbf2ea959e6a667f1be86e3a52079e48a3bf570;hb=93c48a0977927fc3678600d0d79ef43e8f30761f;hp=fd4640c21584e399c3d99b38e64afa708a977772;hpb=ac0bb4126c835ea89313cbb4f534c37bc14a2694;p=rust.git diff --git a/clippy_lints/src/doc.rs b/clippy_lints/src/doc.rs index fd4640c2158..6dbf2ea959e 100644 --- a/clippy_lints/src/doc.rs +++ b/clippy_lints/src/doc.rs @@ -1,26 +1,33 @@ +use itertools::Itertools; +use pulldown_cmark; use rustc::lint::*; use syntax::ast; use syntax::codemap::{Span, BytePos}; +use syntax_pos::Pos; use utils::span_lint; -/// **What it does:** This lint checks for the presence of `_`, `::` or camel-case words outside -/// ticks in documentation. +/// **What it does:** Checks for the presence of `_`, `::` or camel-case words +/// outside ticks in documentation. /// -/// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and camel-case probably -/// indicates some code which should be included between ticks. `_` can also be used for empasis in -/// markdown, this lint tries to consider that. +/// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and +/// camel-case probably indicates some code which should be included between +/// ticks. `_` can also be used for empasis in markdown, this lint tries to +/// consider that. /// -/// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks for is limited. +/// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks +/// for is limited, and there are still false positives. /// /// **Examples:** /// ```rust -/// /// Do something with the foo_bar parameter. See also that::other::module::foo. +/// /// Do something with the foo_bar parameter. See also +/// that::other::module::foo. /// // ^ `foo_bar` and `that::other::module::foo` should be ticked. /// fn doit(foo_bar) { .. } /// ``` declare_lint! { - pub DOC_MARKDOWN, Warn, - "checks for the presence of `_`, `::` or camel-case outside ticks in documentation" + pub DOC_MARKDOWN, + Warn, + "presence of `_`, `::` or camel-case outside backticks in documentation" } #[derive(Clone)] @@ -50,197 +57,217 @@ fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) { } } -pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) { - let mut docs = vec![]; +struct Parser<'a> { + parser: pulldown_cmark::Parser<'a>, +} - let mut in_multiline = false; - for attr in attrs { - if attr.node.is_sugared_doc { - if let ast::MetaItemKind::NameValue(_, ref doc) = attr.node.value.node { - if let ast::LitKind::Str(ref doc, _) = doc.node { - // doc comments start with `///` or `//!` - let real_doc = &doc[3..]; - let mut span = attr.span; - span.lo = span.lo + BytePos(3); - - // check for multiline code blocks - if real_doc.trim_left().starts_with("```") { - in_multiline = !in_multiline; - } - if !in_multiline { - docs.push((real_doc, span)); - } - } - } - } +impl<'a> Parser<'a> { + fn new(parser: pulldown_cmark::Parser<'a>) -> Parser<'a> { + Self { parser: parser } } +} + +impl<'a> Iterator for Parser<'a> { + type Item = (usize, pulldown_cmark::Event<'a>); - for (doc, span) in docs { - let _ = check_doc(cx, valid_idents, doc, span); + fn next(&mut self) -> Option { + let offset = self.parser.get_offset(); + self.parser.next().map(|event| (offset, event)) } } -#[allow(while_let_loop)] // #362 -pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], doc: &str, span: Span) -> Result<(), ()> { - // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context. - // There really is no markdown specification that would disambiguate this properly. This is - // what GitHub and Rustdoc do: - // - // foo_bar test_quz → foo_bar test_quz - // foo_bar_baz → foo_bar_baz (note that the “official” spec says this should be emphasized) - // _foo bar_ test_quz_ → foo bar test_quz_ - // \_foo bar\_ → _foo bar_ - // (_baz_) → (baz) - // foo _ bar _ baz → foo _ bar _ baz - - /// Character that can appear in a path - fn is_path_char(c: char) -> bool { - match c { - t if t.is_alphanumeric() => true, - ':' | '_' => true, - _ => false, +/// Cleanup documentation decoration (`///` and such). +/// +/// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or +/// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we +/// need to keep track of +/// the spans but this function is inspired from the later. +#[allow(cast_possible_truncation)] +pub fn strip_doc_comment_decoration(comment: &str, span: Span) -> (String, Vec<(usize, Span)>) { + // one-line comments lose their prefix + const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"]; + for prefix in ONELINERS { + if comment.starts_with(*prefix) { + let doc = &comment[prefix.len()..]; + let mut doc = doc.to_owned(); + doc.push('\n'); + return ( + doc.to_owned(), + vec![ + ( + doc.len(), + Span { + lo: span.lo + BytePos(prefix.len() as u32), + ..span + } + ), + ], + ); } } - #[derive(Clone, Debug)] - struct Parser<'a> { - link: bool, - line: &'a str, - span: Span, - current_word_begin: usize, - new_line: bool, - pos: usize, - } - - impl<'a> Parser<'a> { - fn advance_begin(&mut self) { - self.current_word_begin = self.pos; + if comment.starts_with("/*") { + let doc = &comment[3..comment.len() - 2]; + let mut sizes = vec![]; + let mut contains_initial_stars = false; + for line in doc.lines() { + let offset = line.as_ptr() as usize - comment.as_ptr() as usize; + debug_assert_eq!(offset as u32 as usize, offset); + contains_initial_stars |= line.trim_left().starts_with('*'); + // +1 for the newline + sizes.push(( + line.len() + 1, + Span { + lo: span.lo + BytePos(offset as u32), + ..span + }, + )); } - - fn peek(&self) -> Option { - self.line[self.pos..].chars().next() + if !contains_initial_stars { + return (doc.to_string(), sizes); } - - fn jump_to(&mut self, n: char) -> Result<(), ()> { - while let Some(c) = self.next() { - if c == n { - self.advance_begin(); - return Ok(()); + // remove the initial '*'s if any + let mut no_stars = String::with_capacity(doc.len()); + for line in doc.lines() { + let mut chars = line.chars(); + while let Some(c) = chars.next() { + if c.is_whitespace() { + no_stars.push(c); + } else { + no_stars.push(if c == '*' { ' ' } else { c }); + break; } } - - return Err(()); - } - - fn put_back(&mut self, c: char) { - self.pos -= c.len_utf8(); + no_stars.push_str(chars.as_str()); + no_stars.push('\n'); } + return (no_stars, sizes); + } - #[allow(cast_possible_truncation)] - fn word(&self) -> (&'a str, Span) { - let begin = self.current_word_begin; - let end = self.pos; - - debug_assert_eq!(end as u32 as usize, end); - debug_assert_eq!(begin as u32 as usize, begin); + panic!("not a doc-comment: {}", comment); +} - let mut span = self.span; - span.hi = span.lo + BytePos(end as u32); - span.lo = span.lo + BytePos(begin as u32); +pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) { + let mut doc = String::new(); + let mut spans = vec![]; - (&self.line[begin..end], span) + for attr in attrs { + if attr.is_sugared_doc { + if let Some(ref current) = attr.value_str() { + let current = current.to_string(); + let (current, current_spans) = strip_doc_comment_decoration(¤t, attr.span); + spans.extend_from_slice(¤t_spans); + doc.push_str(¤t); + } + } else if let Some(name) = attr.name() { + // ignore mix of sugared and non-sugared doc + if name == "doc" { + return; + } } } - impl<'a> Iterator for Parser<'a> { - type Item = char; - - fn next(&mut self) -> Option { - let mut chars = self.line[self.pos..].chars(); - let c = chars.next(); + let mut current = 0; + for &mut (ref mut offset, _) in &mut spans { + let offset_copy = *offset; + *offset = current; + current += offset_copy; + } - if let Some(c) = c { - self.pos += c.len_utf8(); - } else { - // TODO: new line + if !doc.is_empty() { + let parser = Parser::new(pulldown_cmark::Parser::new(&doc)); + let parser = parser.coalesce(|x, y| { + use pulldown_cmark::Event::*; + + let x_offset = x.0; + let y_offset = y.0; + + match (x.1, y.1) { + (Text(x), Text(y)) => { + let mut x = x.into_owned(); + x.push_str(&y); + Ok((x_offset, Text(x.into()))) + }, + (x, y) => Err(((x_offset, x), (y_offset, y))), } + }); + check_doc(cx, valid_idents, parser, &spans); + } +} - c +fn check_doc<'a, Events: Iterator)>>( + cx: &EarlyContext, + valid_idents: &[String], + docs: Events, + spans: &[(usize, Span)], +) { + use pulldown_cmark::Event::*; + use pulldown_cmark::Tag::*; + + let mut in_code = false; + + for (offset, event) in docs { + match event { + Start(CodeBlock(_)) | + Start(Code) => in_code = true, + End(CodeBlock(_)) | + End(Code) => in_code = false, + Start(_tag) | End(_tag) => (), // We don't care about other tags + Html(_html) | + InlineHtml(_html) => (), // HTML is weird, just ignore it + SoftBreak => (), + HardBreak => (), + FootnoteReference(text) | + Text(text) => { + if !in_code { + let index = match spans.binary_search_by(|c| c.0.cmp(&offset)) { + Ok(o) => o, + Err(e) => e - 1, + }; + + let (begin, span) = spans[index]; + + // Adjust for the begining of the current `Event` + let span = Span { + lo: span.lo + BytePos::from_usize(offset - begin), + ..span + }; + + check_text(cx, valid_idents, &text, span); + } + }, } } +} - let mut parser = Parser { - link: false, - line: doc, - span: span, - current_word_begin: 0, - new_line: true, - pos: 0, - }; - - loop { - match parser.next() { - Some(c) => { - match c { - '#' if new_line => { // don’t warn on titles - try!(parser.jump_to('\n')); - } - '`' => { - try!(parser.jump_to('`')); - } - '[' => { - // Check for a reference definition `[foo]:` at the beginning of a line - let mut link = true; - if parser.new_line { - let mut lookup_parser = parser.clone(); - if let Some(_) = lookup_parser.find(|&c| c == ']') { - if let Some(':') = lookup_parser.next() { - try!(lookup_parser.jump_to(')')); - parser = lookup_parser; - link = false; - } - } - } +fn check_text(cx: &EarlyContext, valid_idents: &[String], text: &str, span: Span) { + for word in text.split_whitespace() { + // Trim punctuation as in `some comment (see foo::bar).` + // ^^ + // Or even as in `_foo bar_` which is emphasized. + let word = word.trim_matches(|c: char| !c.is_alphanumeric()); - parser.advance_begin(); - parser.link = link; - } - ']' if parser.link => { - parser.link = false; - - match parser.peek() { - Some('(') => try!(parser.jump_to(')')), - Some('[') => try!(parser.jump_to(']')), - Some(_) => continue, - None => return Err(()), - } - } - c if !is_path_char(c) => { - parser.advance_begin(); - } - _ => { - if let Some(c) = parser.find(|&c| !is_path_char(c)) { - parser.put_back(c); - } + if valid_idents.iter().any(|i| i == word) { + continue; + } - let (word, span) = parser.word(); - check_word(cx, valid_idents, word, span); - parser.advance_begin(); - } - } + // Adjust for the current word + let offset = word.as_ptr() as usize - text.as_ptr() as usize; + let span = Span { + lo: span.lo + BytePos::from_usize(offset), + hi: span.lo + BytePos::from_usize(offset + word.len()), + ..span + }; - parser.new_line = c == '\n' || (parser.new_line && c.is_whitespace()); - } - None => break, - } + check_word(cx, word, span); } - - Ok(()) } -fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) { - /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is - /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok). +fn check_word(cx: &EarlyContext, word: &str, span: Span) { + /// Checks if a string is camel-case, ie. contains at least two uppercase + /// letter (`Clippy` is + /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded + /// (`IDs` is ok). fn is_camel_case(s: &str) -> bool { if s.starts_with(|c: char| c.is_digit(10)) { return false; @@ -252,28 +279,20 @@ fn is_camel_case(s: &str) -> bool { s }; - s.chars().all(char::is_alphanumeric) && - s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 && - s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0 + s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 && + s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0 } fn has_underscore(s: &str) -> bool { s != "_" && !s.contains("\\_") && s.contains('_') } - // Trim punctuation as in `some comment (see foo::bar).` - // ^^ - // Or even as in `_foo bar_` which is emphasized. - let word = word.trim_matches(|c: char| !c.is_alphanumeric()); - - if valid_idents.iter().any(|i| i == word) { - return; - } - if has_underscore(word) || word.contains("::") || is_camel_case(word) { - span_lint(cx, - DOC_MARKDOWN, - span, - &format!("you should put `{}` between ticks in the documentation", word)); + span_lint( + cx, + DOC_MARKDOWN, + span, + &format!("you should put `{}` between ticks in the documentation", word), + ); } }