X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=clippy_lints%2Fsrc%2Fdoc.rs;h=6dbf2ea959e6a667f1be86e3a52079e48a3bf570;hb=93c48a0977927fc3678600d0d79ef43e8f30761f;hp=4183632543b7a91fb2dc377b94c1d4a491bbf324;hpb=d61c7fc7470ba572624f14af46f519e24322112d;p=rust.git diff --git a/clippy_lints/src/doc.rs b/clippy_lints/src/doc.rs index 4183632543b..6dbf2ea959e 100644 --- a/clippy_lints/src/doc.rs +++ b/clippy_lints/src/doc.rs @@ -1,6 +1,9 @@ +use itertools::Itertools; +use pulldown_cmark; use rustc::lint::*; use syntax::ast; use syntax::codemap::{Span, BytePos}; +use syntax_pos::Pos; use utils::span_lint; /// **What it does:** Checks for the presence of `_`, `::` or camel-case words @@ -16,7 +19,8 @@ /// /// **Examples:** /// ```rust -/// /// Do something with the foo_bar parameter. See also that::other::module::foo. +/// /// Do something with the foo_bar parameter. See also +/// that::other::module::foo. /// // ^ `foo_bar` and `that::other::module::foo` should be ticked. /// fn doit(foo_bar) { .. } /// ``` @@ -53,328 +57,217 @@ fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) { } } +struct Parser<'a> { + parser: pulldown_cmark::Parser<'a>, +} + +impl<'a> Parser<'a> { + fn new(parser: pulldown_cmark::Parser<'a>) -> Parser<'a> { + Self { parser: parser } + } +} + +impl<'a> Iterator for Parser<'a> { + type Item = (usize, pulldown_cmark::Event<'a>); + + fn next(&mut self) -> Option { + let offset = self.parser.get_offset(); + self.parser.next().map(|event| (offset, event)) + } +} + /// Cleanup documentation decoration (`///` and such). /// /// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or -/// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we need to keep track of -/// the span but this function is inspired from the later. +/// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we +/// need to keep track of +/// the spans but this function is inspired from the later. #[allow(cast_possible_truncation)] -pub fn strip_doc_comment_decoration((comment, span): (String, Span)) -> Vec<(String, Span)> { +pub fn strip_doc_comment_decoration(comment: &str, span: Span) -> (String, Vec<(usize, Span)>) { // one-line comments lose their prefix const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"]; for prefix in ONELINERS { if comment.starts_with(*prefix) { - return vec![( - comment[prefix.len()..].to_owned(), - Span { lo: span.lo + BytePos(prefix.len() as u32), ..span } - )]; + let doc = &comment[prefix.len()..]; + let mut doc = doc.to_owned(); + doc.push('\n'); + return ( + doc.to_owned(), + vec![ + ( + doc.len(), + Span { + lo: span.lo + BytePos(prefix.len() as u32), + ..span + } + ), + ], + ); } } if comment.starts_with("/*") { - return comment[3..comment.len() - 2].lines().map(|line| { + let doc = &comment[3..comment.len() - 2]; + let mut sizes = vec![]; + let mut contains_initial_stars = false; + for line in doc.lines() { let offset = line.as_ptr() as usize - comment.as_ptr() as usize; debug_assert_eq!(offset as u32 as usize, offset); - - ( - line.to_owned(), + contains_initial_stars |= line.trim_left().starts_with('*'); + // +1 for the newline + sizes.push(( + line.len() + 1, Span { lo: span.lo + BytePos(offset as u32), ..span + }, + )); + } + if !contains_initial_stars { + return (doc.to_string(), sizes); + } + // remove the initial '*'s if any + let mut no_stars = String::with_capacity(doc.len()); + for line in doc.lines() { + let mut chars = line.chars(); + while let Some(c) = chars.next() { + if c.is_whitespace() { + no_stars.push(c); + } else { + no_stars.push(if c == '*' { ' ' } else { c }); + break; } - ) - }).collect(); + } + no_stars.push_str(chars.as_str()); + no_stars.push('\n'); + } + return (no_stars, sizes); } panic!("not a doc-comment: {}", comment); } pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) { - let mut docs = vec![]; + let mut doc = String::new(); + let mut spans = vec![]; for attr in attrs { if attr.is_sugared_doc { - if let ast::MetaItemKind::NameValue(ref doc) = attr.value.node { - if let ast::LitKind::Str(ref doc, _) = doc.node { - let doc = (*doc.as_str()).to_owned(); - docs.extend_from_slice(&strip_doc_comment_decoration((doc, attr.span))); - } + if let Some(ref current) = attr.value_str() { + let current = current.to_string(); + let (current, current_spans) = strip_doc_comment_decoration(¤t, attr.span); + spans.extend_from_slice(¤t_spans); + doc.push_str(¤t); + } + } else if let Some(name) = attr.name() { + // ignore mix of sugared and non-sugared doc + if name == "doc" { + return; } } } - if !docs.is_empty() { - let _ = check_doc(cx, valid_idents, &docs); - } -} - -#[allow(while_let_loop)] // #362 -fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(String, Span)]) -> Result<(), ()> { - // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context. - // There really is no markdown specification that would disambiguate this properly. This is - // what GitHub and Rustdoc do: - // - // foo_bar test_quz → foo_bar test_quz - // foo_bar_baz → foo_bar_baz (note that the “official” spec says this should be emphasized) - // _foo bar_ test_quz_ → foo bar test_quz_ - // \_foo bar\_ → _foo bar_ - // (_baz_) → (baz) - // foo _ bar _ baz → foo _ bar _ baz - - /// Character that can appear in a path - fn is_path_char(c: char) -> bool { - match c { - t if t.is_alphanumeric() => true, - ':' | '_' => true, - _ => false, - } - } - - #[derive(Clone, Debug)] - /// This type is used to iterate through the documentation characters, keeping the span at the - /// same time. - struct Parser<'a> { - /// First byte of the current potential match - current_word_begin: usize, - /// List of lines and their associated span - docs: &'a [(String, Span)], - /// Index of the current line we are parsing - line: usize, - /// Whether we are in a link - link: bool, - /// Whether we are at the beginning of a line - new_line: bool, - /// Whether we were to the end of a line last time `next` was called - reset: bool, - /// The position of the current character within the current line - pos: usize, + let mut current = 0; + for &mut (ref mut offset, _) in &mut spans { + let offset_copy = *offset; + *offset = current; + current += offset_copy; } - impl<'a> Parser<'a> { - fn advance_begin(&mut self) { - self.current_word_begin = self.pos; - } - - fn line(&self) -> (&'a str, Span) { - let (ref doc, span) = self.docs[self.line]; - (doc, span) - } - - fn peek(&self) -> Option { - self.line().0[self.pos..].chars().next() - } - - #[allow(while_let_on_iterator)] // borrowck complains about for - fn jump_to(&mut self, n: char) -> Result { - while let Some((new_line, c)) = self.next() { - if c == n { - self.advance_begin(); - return Ok(new_line); - } + if !doc.is_empty() { + let parser = Parser::new(pulldown_cmark::Parser::new(&doc)); + let parser = parser.coalesce(|x, y| { + use pulldown_cmark::Event::*; + + let x_offset = x.0; + let y_offset = y.0; + + match (x.1, y.1) { + (Text(x), Text(y)) => { + let mut x = x.into_owned(); + x.push_str(&y); + Ok((x_offset, Text(x.into()))) + }, + (x, y) => Err(((x_offset, x), (y_offset, y))), } - - Err(()) - } - - fn next_line(&mut self) { - self.pos = 0; - self.current_word_begin = 0; - self.line += 1; - self.new_line = true; - } - - fn put_back(&mut self, c: char) { - self.pos -= c.len_utf8(); - } - - #[allow(cast_possible_truncation)] - fn word(&self) -> (&'a str, Span) { - let begin = self.current_word_begin; - let end = self.pos; - - debug_assert_eq!(end as u32 as usize, end); - debug_assert_eq!(begin as u32 as usize, begin); - - let (doc, mut span) = self.line(); - span.hi = span.lo + BytePos(end as u32); - span.lo = span.lo + BytePos(begin as u32); - - (&doc[begin..end], span) - } + }); + check_doc(cx, valid_idents, parser, &spans); } +} - impl<'a> Iterator for Parser<'a> { - type Item = (bool, char); - - fn next(&mut self) -> Option<(bool, char)> { - while self.line < self.docs.len() { - if self.reset { - self.line += 1; - self.reset = false; - self.pos = 0; - self.current_word_begin = 0; - } - - let mut chars = self.line().0[self.pos..].chars(); - let c = chars.next(); - - if let Some(c) = c { - self.pos += c.len_utf8(); - let new_line = self.new_line; - self.new_line = c == '\n' || (self.new_line && c.is_whitespace()); - return Some((new_line, c)); - } else if self.line == self.docs.len() - 1 { - return None; - } else { - self.new_line = true; - self.reset = true; - self.pos += 1; - return Some((true, '\n')); +fn check_doc<'a, Events: Iterator)>>( + cx: &EarlyContext, + valid_idents: &[String], + docs: Events, + spans: &[(usize, Span)], +) { + use pulldown_cmark::Event::*; + use pulldown_cmark::Tag::*; + + let mut in_code = false; + + for (offset, event) in docs { + match event { + Start(CodeBlock(_)) | + Start(Code) => in_code = true, + End(CodeBlock(_)) | + End(Code) => in_code = false, + Start(_tag) | End(_tag) => (), // We don't care about other tags + Html(_html) | + InlineHtml(_html) => (), // HTML is weird, just ignore it + SoftBreak => (), + HardBreak => (), + FootnoteReference(text) | + Text(text) => { + if !in_code { + let index = match spans.binary_search_by(|c| c.0.cmp(&offset)) { + Ok(o) => o, + Err(e) => e - 1, + }; + + let (begin, span) = spans[index]; + + // Adjust for the begining of the current `Event` + let span = Span { + lo: span.lo + BytePos::from_usize(offset - begin), + ..span + }; + + check_text(cx, valid_idents, &text, span); } - } - - None + }, } } +} - let mut parser = Parser { - current_word_begin: 0, - docs: docs, - line: 0, - link: false, - new_line: true, - reset: false, - pos: 0, - }; - - /// Check for fanced code block. - macro_rules! check_block { - ($parser:expr, $c:tt, $new_line:expr) => {{ - check_block!($parser, $c, $c, $new_line) - }}; - - ($parser:expr, $c:pat, $c_expr:expr, $new_line:expr) => {{ - fn check_block(parser: &mut Parser, new_line: bool) -> Result { - if new_line { - let mut lookup_parser = parser.clone(); - if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) { - *parser = lookup_parser; - // 3 or more ` or ~ open a code block to be closed with the same number of ` or ~ - let mut open_count = 3; - while let Some((false, $c)) = parser.next() { - open_count += 1; - } - - loop { - loop { - if try!(parser.jump_to($c_expr)) { - break; - } - } - - lookup_parser = parser.clone(); - if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) { - let mut close_count = 3; - while let Some((false, $c)) = lookup_parser.next() { - close_count += 1; - } - - if close_count == open_count { - *parser = lookup_parser; - return Ok(true); - } - } - } - } - } - - Ok(false) - } - - check_block(&mut $parser, $new_line) - }}; - } - - loop { - match parser.next() { - Some((new_line, c)) => { - match c { - '#' if new_line => { // don’t warn on titles - parser.next_line(); - } - '`' => { - if try!(check_block!(parser, '`', new_line)) { - continue; - } - - try!(parser.jump_to('`')); // not a code block, just inline code - } - '~' => { - if try!(check_block!(parser, '~', new_line)) { - continue; - } - - // ~ does not introduce inline code, but two of them introduce - // strikethrough. Too bad for the consistency but we don't care about - // strikethrough. - } - '[' => { - // Check for a reference definition `[foo]:` at the beginning of a line - let mut link = true; - - if new_line { - let mut lookup_parser = parser.clone(); - if lookup_parser.any(|(_, c)| c == ']') { - if let Some((_, ':')) = lookup_parser.next() { - lookup_parser.next_line(); - parser = lookup_parser; - link = false; - } - } - } +fn check_text(cx: &EarlyContext, valid_idents: &[String], text: &str, span: Span) { + for word in text.split_whitespace() { + // Trim punctuation as in `some comment (see foo::bar).` + // ^^ + // Or even as in `_foo bar_` which is emphasized. + let word = word.trim_matches(|c: char| !c.is_alphanumeric()); - parser.advance_begin(); - parser.link = link; - } - ']' if parser.link => { - parser.link = false; - - match parser.peek() { - Some('(') => { - try!(parser.jump_to(')')); - } - Some('[') => { - try!(parser.jump_to(']')); - } - Some(_) => continue, - None => return Err(()), - } - } - c if !is_path_char(c) => { - parser.advance_begin(); - } - _ => { - if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) { - parser.put_back(c); - } + if valid_idents.iter().any(|i| i == word) { + continue; + } - let (word, span) = parser.word(); - check_word(cx, valid_idents, word, span); - parser.advance_begin(); - } - } + // Adjust for the current word + let offset = word.as_ptr() as usize - text.as_ptr() as usize; + let span = Span { + lo: span.lo + BytePos::from_usize(offset), + hi: span.lo + BytePos::from_usize(offset + word.len()), + ..span + }; - } - None => break, - } + check_word(cx, word, span); } - - Ok(()) } -fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) { - /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is - /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok). +fn check_word(cx: &EarlyContext, word: &str, span: Span) { + /// Checks if a string is camel-case, ie. contains at least two uppercase + /// letter (`Clippy` is + /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded + /// (`IDs` is ok). fn is_camel_case(s: &str) -> bool { if s.starts_with(|c: char| c.is_digit(10)) { return false; @@ -386,28 +279,20 @@ fn is_camel_case(s: &str) -> bool { s }; - s.chars().all(char::is_alphanumeric) && - s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 && - s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0 + s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 && + s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0 } fn has_underscore(s: &str) -> bool { s != "_" && !s.contains("\\_") && s.contains('_') } - // Trim punctuation as in `some comment (see foo::bar).` - // ^^ - // Or even as in `_foo bar_` which is emphasized. - let word = word.trim_matches(|c: char| !c.is_alphanumeric()); - - if valid_idents.iter().any(|i| i == word) { - return; - } - if has_underscore(word) || word.contains("::") || is_camel_case(word) { - span_lint(cx, - DOC_MARKDOWN, - span, - &format!("you should put `{}` between ticks in the documentation", word)); + span_lint( + cx, + DOC_MARKDOWN, + span, + &format!("you should put `{}` between ticks in the documentation", word), + ); } }