+use itertools::Itertools;
+use pulldown_cmark;
use rustc::lint::*;
use syntax::ast;
use syntax::codemap::{Span, BytePos};
+use syntax_pos::Pos;
use utils::span_lint;
-/// **What it does:** This lint checks for the presence of `_`, `::` or camel-case words outside
-/// ticks in documentation.
+/// **What it does:** Checks for the presence of `_`, `::` or camel-case words
+/// outside ticks in documentation.
///
-/// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and camel-case probably
-/// indicates some code which should be included between ticks. `_` can also be used for empasis in
-/// markdown, this lint tries to consider that.
+/// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and
+/// camel-case probably indicates some code which should be included between
+/// ticks. `_` can also be used for empasis in markdown, this lint tries to
+/// consider that.
///
-/// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks for is limited.
+/// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks
+/// for is limited, and there are still false positives.
///
/// **Examples:**
/// ```rust
-/// /// Do something with the foo_bar parameter. See also that::other::module::foo.
+/// /// Do something with the foo_bar parameter. See also
+/// that::other::module::foo.
/// // ^ `foo_bar` and `that::other::module::foo` should be ticked.
/// fn doit(foo_bar) { .. }
/// ```
declare_lint! {
- pub DOC_MARKDOWN, Warn,
- "checks for the presence of `_`, `::` or camel-case outside ticks in documentation"
+ pub DOC_MARKDOWN,
+ Warn,
+ "presence of `_`, `::` or camel-case outside backticks in documentation"
}
#[derive(Clone)]
}
}
-pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
- let mut in_multiline = false;
- for attr in attrs {
- if attr.node.is_sugared_doc {
- if let ast::MetaItemKind::NameValue(_, ref doc) = attr.node.value.node {
- if let ast::LitKind::Str(ref doc, _) = doc.node {
- // doc comments start with `///` or `//!`
- let real_doc = &doc[3..];
- let mut span = attr.span;
- span.lo = span.lo + BytePos(3);
-
- // check for multiline code blocks
- if real_doc.trim_left().starts_with("```") {
- in_multiline = !in_multiline;
- }
- if !in_multiline {
- check_doc(cx, valid_idents, real_doc, span);
- }
- }
- }
- }
+struct Parser<'a> {
+ parser: pulldown_cmark::Parser<'a>,
+}
+
+impl<'a> Parser<'a> {
+ fn new(parser: pulldown_cmark::Parser<'a>) -> Parser<'a> {
+ Self { parser: parser }
}
}
-macro_rules! jump_to {
- // Get the next character’s first byte UTF-8 friendlyly.
- (@next_char, $chars: expr, $len: expr) => {{
- if let Some(&(pos, _)) = $chars.peek() {
- pos
- } else {
- $len
+impl<'a> Iterator for Parser<'a> {
+ type Item = (usize, pulldown_cmark::Event<'a>);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let offset = self.parser.get_offset();
+ self.parser.next().map(|event| (offset, event))
+ }
+}
+
+/// Cleanup documentation decoration (`///` and such).
+///
+/// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or
+/// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we
+/// need to keep track of
+/// the spans but this function is inspired from the later.
+#[allow(cast_possible_truncation)]
+pub fn strip_doc_comment_decoration(comment: &str, span: Span) -> (String, Vec<(usize, Span)>) {
+ // one-line comments lose their prefix
+ const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
+ for prefix in ONELINERS {
+ if comment.starts_with(*prefix) {
+ let doc = &comment[prefix.len()..];
+ let mut doc = doc.to_owned();
+ doc.push('\n');
+ return (
+ doc.to_owned(),
+ vec![
+ (
+ doc.len(),
+ Span {
+ lo: span.lo + BytePos(prefix.len() as u32),
+ ..span
+ }
+ ),
+ ],
+ );
}
- }};
+ }
- // Jump to the next `$c`. If no such character is found, give up.
- ($chars: expr, $c: expr, $len: expr) => {{
- if $chars.find(|&(_, c)| c == $c).is_some() {
- jump_to!(@next_char, $chars, $len)
+ if comment.starts_with("/*") {
+ let doc = &comment[3..comment.len() - 2];
+ let mut sizes = vec![];
+ let mut contains_initial_stars = false;
+ for line in doc.lines() {
+ let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
+ debug_assert_eq!(offset as u32 as usize, offset);
+ contains_initial_stars |= line.trim_left().starts_with('*');
+ // +1 for the newline
+ sizes.push((
+ line.len() + 1,
+ Span {
+ lo: span.lo + BytePos(offset as u32),
+ ..span
+ },
+ ));
+ }
+ if !contains_initial_stars {
+ return (doc.to_string(), sizes);
}
- else {
- return;
+ // remove the initial '*'s if any
+ let mut no_stars = String::with_capacity(doc.len());
+ for line in doc.lines() {
+ let mut chars = line.chars();
+ while let Some(c) = chars.next() {
+ if c.is_whitespace() {
+ no_stars.push(c);
+ } else {
+ no_stars.push(if c == '*' { ' ' } else { c });
+ break;
+ }
+ }
+ no_stars.push_str(chars.as_str());
+ no_stars.push('\n');
}
- }};
+ return (no_stars, sizes);
+ }
+
+ panic!("not a doc-comment: {}", comment);
}
-#[allow(while_let_loop)] // #362
-pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], doc: &str, span: Span) {
- // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
- // There really is no markdown specification that would disambiguate this properly. This is
- // what GitHub and Rustdoc do:
- //
- // foo_bar test_quz → foo_bar test_quz
- // foo_bar_baz → foo_bar_baz (note that the “official” spec says this should be emphasized)
- // _foo bar_ test_quz_ → <em>foo bar</em> test_quz_
- // \_foo bar\_ → _foo bar_
- // (_baz_) → (<em>baz</em>)
- // foo _ bar _ baz → foo _ bar _ baz
-
- /// Character that can appear in a word
- fn is_word_char(c: char) -> bool {
- match c {
- t if t.is_alphanumeric() => true,
- ':' | '_' => true,
- _ => false,
+pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
+ let mut doc = String::new();
+ let mut spans = vec![];
+
+ for attr in attrs {
+ if attr.is_sugared_doc {
+ if let Some(ref current) = attr.value_str() {
+ let current = current.to_string();
+ let (current, current_spans) = strip_doc_comment_decoration(¤t, attr.span);
+ spans.extend_from_slice(¤t_spans);
+ doc.push_str(¤t);
+ }
+ } else if let Some(name) = attr.name() {
+ // ignore mix of sugared and non-sugared doc
+ if name == "doc" {
+ return;
+ }
}
}
- #[allow(cast_possible_truncation)]
- fn word_span(mut span: Span, begin: usize, end: usize) -> Span {
- debug_assert_eq!(end as u32 as usize, end);
- debug_assert_eq!(begin as u32 as usize, begin);
- span.hi = span.lo + BytePos(end as u32);
- span.lo = span.lo + BytePos(begin as u32);
- span
- }
-
- let mut new_line = true;
- let len = doc.len();
- let mut chars = doc.char_indices().peekable();
- let mut current_word_begin = 0;
- loop {
- match chars.next() {
- Some((_, c)) => {
- match c {
- '#' if new_line => { // don’t warn on titles
- current_word_begin = jump_to!(chars, '\n', len);
- }
- '`' => {
- current_word_begin = jump_to!(chars, '`', len);
- }
- '[' => {
- let end = jump_to!(chars, ']', len);
- let link_text = &doc[current_word_begin + 1..end];
- let word_span = word_span(span, current_word_begin + 1, end + 1);
-
- match chars.peek() {
- Some(&(_, c)) => {
- // Trying to parse a link. Let’s ignore the link.
-
- // FIXME: how does markdown handles such link?
- // https://en.wikipedia.org/w/index.php?title=)
- match c {
- '(' => { // inline link
- current_word_begin = jump_to!(chars, ')', len);
- check_doc(cx, valid_idents, link_text, word_span);
- }
- '[' => { // reference link
- current_word_begin = jump_to!(chars, ']', len);
- check_doc(cx, valid_idents, link_text, word_span);
- }
- ':' => { // reference link
- current_word_begin = jump_to!(chars, '\n', len);
- }
- _ => { // automatic reference link
- current_word_begin = jump_to!(@next_char, chars, len);
- check_doc(cx, valid_idents, link_text, word_span);
- }
- }
- }
- None => return,
- }
- }
- // anything that’s neither alphanumeric nor '_' is not part of an ident anyway
- c if !c.is_alphanumeric() && c != '_' => {
- current_word_begin = jump_to!(@next_char, chars, len);
- }
- _ => {
- let end = match chars.find(|&(_, c)| !is_word_char(c)) {
- Some((end, _)) => end,
- None => len,
- };
- let word_span = word_span(span, current_word_begin, end);
- check_word(cx, valid_idents, &doc[current_word_begin..end], word_span);
- current_word_begin = jump_to!(@next_char, chars, len);
- }
- }
+ let mut current = 0;
+ for &mut (ref mut offset, _) in &mut spans {
+ let offset_copy = *offset;
+ *offset = current;
+ current += offset_copy;
+ }
+
+ if !doc.is_empty() {
+ let parser = Parser::new(pulldown_cmark::Parser::new(&doc));
+ let parser = parser.coalesce(|x, y| {
+ use pulldown_cmark::Event::*;
- new_line = c == '\n' || (new_line && c.is_whitespace());
+ let x_offset = x.0;
+ let y_offset = y.0;
+
+ match (x.1, y.1) {
+ (Text(x), Text(y)) => {
+ let mut x = x.into_owned();
+ x.push_str(&y);
+ Ok((x_offset, Text(x.into())))
+ },
+ (x, y) => Err(((x_offset, x), (y_offset, y))),
}
- None => break,
+ });
+ check_doc(cx, valid_idents, parser, &spans);
+ }
+}
+
+fn check_doc<'a, Events: Iterator<Item = (usize, pulldown_cmark::Event<'a>)>>(
+ cx: &EarlyContext,
+ valid_idents: &[String],
+ docs: Events,
+ spans: &[(usize, Span)],
+) {
+ use pulldown_cmark::Event::*;
+ use pulldown_cmark::Tag::*;
+
+ let mut in_code = false;
+
+ for (offset, event) in docs {
+ match event {
+ Start(CodeBlock(_)) |
+ Start(Code) => in_code = true,
+ End(CodeBlock(_)) |
+ End(Code) => in_code = false,
+ Start(_tag) | End(_tag) => (), // We don't care about other tags
+ Html(_html) |
+ InlineHtml(_html) => (), // HTML is weird, just ignore it
+ SoftBreak => (),
+ HardBreak => (),
+ FootnoteReference(text) |
+ Text(text) => {
+ if !in_code {
+ let index = match spans.binary_search_by(|c| c.0.cmp(&offset)) {
+ Ok(o) => o,
+ Err(e) => e - 1,
+ };
+
+ let (begin, span) = spans[index];
+
+ // Adjust for the begining of the current `Event`
+ let span = Span {
+ lo: span.lo + BytePos::from_usize(offset - begin),
+ ..span
+ };
+
+ check_text(cx, valid_idents, &text, span);
+ }
+ },
+ }
+ }
+}
+
+fn check_text(cx: &EarlyContext, valid_idents: &[String], text: &str, span: Span) {
+ for word in text.split_whitespace() {
+ // Trim punctuation as in `some comment (see foo::bar).`
+ // ^^
+ // Or even as in `_foo bar_` which is emphasized.
+ let word = word.trim_matches(|c: char| !c.is_alphanumeric());
+
+ if valid_idents.iter().any(|i| i == word) {
+ continue;
}
+
+ // Adjust for the current word
+ let offset = word.as_ptr() as usize - text.as_ptr() as usize;
+ let span = Span {
+ lo: span.lo + BytePos::from_usize(offset),
+ hi: span.lo + BytePos::from_usize(offset + word.len()),
+ ..span
+ };
+
+ check_word(cx, word, span);
}
}
-fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) {
- /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is
- /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok).
+fn check_word(cx: &EarlyContext, word: &str, span: Span) {
+ /// Checks if a string is camel-case, ie. contains at least two uppercase
+ /// letter (`Clippy` is
+ /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded
+ /// (`IDs` is ok).
fn is_camel_case(s: &str) -> bool {
if s.starts_with(|c: char| c.is_digit(10)) {
return false;
s
};
- s.chars().all(char::is_alphanumeric) &&
- s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
- s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
+ s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
+ s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
}
fn has_underscore(s: &str) -> bool {
s != "_" && !s.contains("\\_") && s.contains('_')
}
- // Trim punctuation as in `some comment (see foo::bar).`
- // ^^
- // Or even as in `_foo bar_` which is emphasized.
- let word = word.trim_matches(|c: char| !c.is_alphanumeric());
-
- if valid_idents.iter().any(|i| i == word) {
- return;
- }
-
if has_underscore(word) || word.contains("::") || is_camel_case(word) {
- span_lint(cx,
- DOC_MARKDOWN,
- span,
- &format!("you should put `{}` between ticks in the documentation", word));
+ span_lint(
+ cx,
+ DOC_MARKDOWN,
+ span,
+ &format!("you should put `{}` between ticks in the documentation", word),
+ );
}
}