+use itertools::Itertools;
+use pulldown_cmark;
use rustc::lint::*;
use syntax::ast;
use syntax::codemap::{Span, BytePos};
+use syntax_pos::Pos;
use utils::span_lint;
/// **What it does:** Checks for the presence of `_`, `::` or camel-case words
///
/// **Examples:**
/// ```rust
-/// /// Do something with the foo_bar parameter. See also that::other::module::foo.
+/// /// Do something with the foo_bar parameter. See also
+/// that::other::module::foo.
/// // ^ `foo_bar` and `that::other::module::foo` should be ticked.
/// fn doit(foo_bar) { .. }
/// ```
impl Doc {
pub fn new(valid_idents: Vec<String>) -> Self {
- Doc { valid_idents: valid_idents }
+ Self { valid_idents: valid_idents }
}
}
}
}
+struct Parser<'a> {
+ parser: pulldown_cmark::Parser<'a>,
+}
+
+impl<'a> Parser<'a> {
+ fn new(parser: pulldown_cmark::Parser<'a>) -> Self {
+ Self { parser: parser }
+ }
+}
+
+impl<'a> Iterator for Parser<'a> {
+ type Item = (usize, pulldown_cmark::Event<'a>);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let offset = self.parser.get_offset();
+ self.parser.next().map(|event| (offset, event))
+ }
+}
+
/// Cleanup documentation decoration (`///` and such).
///
/// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or
-/// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we need to keep track of
-/// the span but this function is inspired from the later.
+/// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we
+/// need to keep track of
+/// the spans but this function is inspired from the later.
#[allow(cast_possible_truncation)]
-pub fn strip_doc_comment_decoration((comment, span): (String, Span)) -> Vec<(String, Span)> {
+pub fn strip_doc_comment_decoration(comment: &str, span: Span) -> (String, Vec<(usize, Span)>) {
// one-line comments lose their prefix
const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
for prefix in ONELINERS {
if comment.starts_with(*prefix) {
- return vec![(comment[prefix.len()..].to_owned(),
- Span { lo: span.lo + BytePos(prefix.len() as u32), ..span })];
+ let doc = &comment[prefix.len()..];
+ let mut doc = doc.to_owned();
+ doc.push('\n');
+ return (
+ doc.to_owned(),
+ vec![
+ (doc.len(), span.with_lo(span.lo() + BytePos(prefix.len() as u32))),
+ ],
+ );
}
}
if comment.starts_with("/*") {
- return comment[3..comment.len() - 2]
- .lines()
- .map(|line| {
- let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
- debug_assert_eq!(offset as u32 as usize, offset);
-
- (line.to_owned(), Span { lo: span.lo + BytePos(offset as u32), ..span })
- })
- .collect();
+ let doc = &comment[3..comment.len() - 2];
+ let mut sizes = vec![];
+ let mut contains_initial_stars = false;
+ for line in doc.lines() {
+ let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
+ debug_assert_eq!(offset as u32 as usize, offset);
+ contains_initial_stars |= line.trim_left().starts_with('*');
+ // +1 for the newline
+ sizes.push((line.len() + 1, span.with_lo(span.lo() + BytePos(offset as u32))));
+ }
+ if !contains_initial_stars {
+ return (doc.to_string(), sizes);
+ }
+ // remove the initial '*'s if any
+ let mut no_stars = String::with_capacity(doc.len());
+ for line in doc.lines() {
+ let mut chars = line.chars();
+ while let Some(c) = chars.next() {
+ if c.is_whitespace() {
+ no_stars.push(c);
+ } else {
+ no_stars.push(if c == '*' { ' ' } else { c });
+ break;
+ }
+ }
+ no_stars.push_str(chars.as_str());
+ no_stars.push('\n');
+ }
+ return (no_stars, sizes);
}
panic!("not a doc-comment: {}", comment);
}
pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
- let mut docs = vec![];
+ let mut doc = String::new();
+ let mut spans = vec![];
for attr in attrs {
if attr.is_sugared_doc {
- if let ast::MetaItemKind::NameValue(ref doc) = attr.value.node {
- if let ast::LitKind::Str(ref doc, _) = doc.node {
- let doc = (*doc.as_str()).to_owned();
- docs.extend_from_slice(&strip_doc_comment_decoration((doc, attr.span)));
- }
+ if let Some(ref current) = attr.value_str() {
+ let current = current.to_string();
+ let (current, current_spans) = strip_doc_comment_decoration(¤t, attr.span);
+ spans.extend_from_slice(¤t_spans);
+ doc.push_str(¤t);
+ }
+ } else if let Some(name) = attr.name() {
+ // ignore mix of sugared and non-sugared doc
+ if name == "doc" {
+ return;
}
}
}
- if !docs.is_empty() {
- let _ = check_doc(cx, valid_idents, &docs);
- }
-}
-
-#[allow(while_let_loop)] // #362
-fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(String, Span)]) -> Result<(), ()> {
- // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
- // There really is no markdown specification that would disambiguate this properly. This is
- // what GitHub and Rustdoc do:
- //
- // foo_bar test_quz → foo_bar test_quz
- // foo_bar_baz → foo_bar_baz (note that the “official” spec says this should be emphasized)
- // _foo bar_ test_quz_ → <em>foo bar</em> test_quz_
- // \_foo bar\_ → _foo bar_
- // (_baz_) → (<em>baz</em>)
- // foo _ bar _ baz → foo _ bar _ baz
-
- /// Character that can appear in a path
- fn is_path_char(c: char) -> bool {
- match c {
- t if t.is_alphanumeric() => true,
- ':' | '_' => true,
- _ => false,
- }
- }
-
- #[derive(Clone, Debug)]
- /// This type is used to iterate through the documentation characters, keeping the span at the
- /// same time.
- struct Parser<'a> {
- /// First byte of the current potential match
- current_word_begin: usize,
- /// List of lines and their associated span
- docs: &'a [(String, Span)],
- /// Index of the current line we are parsing
- line: usize,
- /// Whether we are in a link
- link: bool,
- /// Whether we are at the beginning of a line
- new_line: bool,
- /// Whether we were to the end of a line last time `next` was called
- reset: bool,
- /// The position of the current character within the current line
- pos: usize,
+ let mut current = 0;
+ for &mut (ref mut offset, _) in &mut spans {
+ let offset_copy = *offset;
+ *offset = current;
+ current += offset_copy;
}
- impl<'a> Parser<'a> {
- fn advance_begin(&mut self) {
- self.current_word_begin = self.pos;
- }
-
- fn line(&self) -> (&'a str, Span) {
- let (ref doc, span) = self.docs[self.line];
- (doc, span)
- }
-
- fn peek(&self) -> Option<char> {
- self.line().0[self.pos..].chars().next()
- }
-
- #[allow(while_let_on_iterator)] // borrowck complains about for
- fn jump_to(&mut self, n: char) -> Result<bool, ()> {
- while let Some((new_line, c)) = self.next() {
- if c == n {
- self.advance_begin();
- return Ok(new_line);
- }
+ if !doc.is_empty() {
+ let parser = Parser::new(pulldown_cmark::Parser::new(&doc));
+ let parser = parser.coalesce(|x, y| {
+ use pulldown_cmark::Event::*;
+
+ let x_offset = x.0;
+ let y_offset = y.0;
+
+ match (x.1, y.1) {
+ (Text(x), Text(y)) => {
+ let mut x = x.into_owned();
+ x.push_str(&y);
+ Ok((x_offset, Text(x.into())))
+ },
+ (x, y) => Err(((x_offset, x), (y_offset, y))),
}
-
- Err(())
- }
-
- fn next_line(&mut self) {
- self.pos = 0;
- self.current_word_begin = 0;
- self.line += 1;
- self.new_line = true;
- }
-
- fn put_back(&mut self, c: char) {
- self.pos -= c.len_utf8();
- }
-
- #[allow(cast_possible_truncation)]
- fn word(&self) -> (&'a str, Span) {
- let begin = self.current_word_begin;
- let end = self.pos;
-
- debug_assert_eq!(end as u32 as usize, end);
- debug_assert_eq!(begin as u32 as usize, begin);
-
- let (doc, mut span) = self.line();
- span.hi = span.lo + BytePos(end as u32);
- span.lo = span.lo + BytePos(begin as u32);
-
- (&doc[begin..end], span)
- }
+ });
+ check_doc(cx, valid_idents, parser, &spans);
}
+}
- impl<'a> Iterator for Parser<'a> {
- type Item = (bool, char);
-
- fn next(&mut self) -> Option<(bool, char)> {
- while self.line < self.docs.len() {
- if self.reset {
- self.line += 1;
- self.reset = false;
- self.pos = 0;
- self.current_word_begin = 0;
- }
-
- let mut chars = self.line().0[self.pos..].chars();
- let c = chars.next();
-
- if let Some(c) = c {
- self.pos += c.len_utf8();
- let new_line = self.new_line;
- self.new_line = c == '\n' || (self.new_line && c.is_whitespace());
- return Some((new_line, c));
- } else if self.line == self.docs.len() - 1 {
- return None;
- } else {
- self.new_line = true;
- self.reset = true;
- self.pos += 1;
- return Some((true, '\n'));
+fn check_doc<'a, Events: Iterator<Item = (usize, pulldown_cmark::Event<'a>)>>(
+ cx: &EarlyContext,
+ valid_idents: &[String],
+ docs: Events,
+ spans: &[(usize, Span)],
+) {
+ use pulldown_cmark::Event::*;
+ use pulldown_cmark::Tag::*;
+
+ let mut in_code = false;
+
+ for (offset, event) in docs {
+ match event {
+ Start(CodeBlock(_)) |
+ Start(Code) => in_code = true,
+ End(CodeBlock(_)) |
+ End(Code) => in_code = false,
+ Start(_tag) | End(_tag) => (), // We don't care about other tags
+ Html(_html) |
+ InlineHtml(_html) => (), // HTML is weird, just ignore it
+ SoftBreak => (),
+ HardBreak => (),
+ FootnoteReference(text) |
+ Text(text) => {
+ if !in_code {
+ let index = match spans.binary_search_by(|c| c.0.cmp(&offset)) {
+ Ok(o) => o,
+ Err(e) => e - 1,
+ };
+
+ let (begin, span) = spans[index];
+
+ // Adjust for the begining of the current `Event`
+ let span = span.with_lo(span.lo() + BytePos::from_usize(offset - begin));
+
+ check_text(cx, valid_idents, &text, span);
}
- }
-
- None
+ },
}
}
+}
- let mut parser = Parser {
- current_word_begin: 0,
- docs: docs,
- line: 0,
- link: false,
- new_line: true,
- reset: false,
- pos: 0,
- };
-
- /// Check for fanced code block.
- macro_rules! check_block {
- ($parser:expr, $c:tt, $new_line:expr) => {{
- check_block!($parser, $c, $c, $new_line)
- }};
-
- ($parser:expr, $c:pat, $c_expr:expr, $new_line:expr) => {{
- fn check_block(parser: &mut Parser, new_line: bool) -> Result<bool, ()> {
- if new_line {
- let mut lookup_parser = parser.clone();
- if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) {
- *parser = lookup_parser;
- // 3 or more ` or ~ open a code block to be closed with the same number of ` or ~
- let mut open_count = 3;
- while let Some((false, $c)) = parser.next() {
- open_count += 1;
- }
-
- loop {
- loop {
- if try!(parser.jump_to($c_expr)) {
- break;
- }
- }
-
- lookup_parser = parser.clone();
- let a = lookup_parser.next();
- let b = lookup_parser.next();
- if let (Some((false, $c)), Some((false, $c))) = (a, b) {
- let mut close_count = 3;
- while let Some((false, $c)) = lookup_parser.next() {
- close_count += 1;
- }
-
- if close_count == open_count {
- *parser = lookup_parser;
- return Ok(true);
- }
- }
- }
- }
- }
+fn check_text(cx: &EarlyContext, valid_idents: &[String], text: &str, span: Span) {
+ for word in text.split_whitespace() {
+ // Trim punctuation as in `some comment (see foo::bar).`
+ // ^^
+ // Or even as in `_foo bar_` which is emphasized.
+ let word = word.trim_matches(|c: char| !c.is_alphanumeric());
- Ok(false)
- }
+ if valid_idents.iter().any(|i| i == word) {
+ continue;
+ }
- check_block(&mut $parser, $new_line)
- }};
- }
+ // Adjust for the current word
+ let offset = word.as_ptr() as usize - text.as_ptr() as usize;
+ let span = Span::new(
+ span.lo() + BytePos::from_usize(offset),
+ span.lo() + BytePos::from_usize(offset + word.len()),
+ span.ctxt(),
+ );
- loop {
- match parser.next() {
- Some((new_line, c)) => {
- match c {
- '#' if new_line => {
- // don’t warn on titles
- parser.next_line();
- },
- '`' => {
- if try!(check_block!(parser, '`', new_line)) {
- continue;
- }
-
- try!(parser.jump_to('`')); // not a code block, just inline code
- },
- '~' => {
- if try!(check_block!(parser, '~', new_line)) {
- continue;
- }
-
- // ~ does not introduce inline code, but two of them introduce
- // strikethrough. Too bad for the consistency but we don't care about
- // strikethrough.
- },
- '[' => {
- // Check for a reference definition `[foo]:` at the beginning of a line
- let mut link = true;
-
- if new_line {
- let mut lookup_parser = parser.clone();
- if lookup_parser.any(|(_, c)| c == ']') {
- if let Some((_, ':')) = lookup_parser.next() {
- lookup_parser.next_line();
- parser = lookup_parser;
- link = false;
- }
- }
- }
-
- parser.advance_begin();
- parser.link = link;
- },
- ']' if parser.link => {
- parser.link = false;
-
- match parser.peek() {
- Some('(') => {
- try!(parser.jump_to(')'));
- },
- Some('[') => {
- try!(parser.jump_to(']'));
- },
- Some(_) => continue,
- None => return Err(()),
- }
- },
- c if !is_path_char(c) => {
- parser.advance_begin();
- },
- _ => {
- if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) {
- parser.put_back(c);
- }
-
- let (word, span) = parser.word();
- check_word(cx, valid_idents, word, span);
- parser.advance_begin();
- },
- }
-
- },
- None => break,
- }
+ check_word(cx, word, span);
}
-
- Ok(())
}
-fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) {
- /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is
- /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok).
+fn check_word(cx: &EarlyContext, word: &str, span: Span) {
+ /// Checks if a string is camel-case, ie. contains at least two uppercase
+ /// letter (`Clippy` is
+ /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded
+ /// (`IDs` is ok).
fn is_camel_case(s: &str) -> bool {
if s.starts_with(|c: char| c.is_digit(10)) {
return false;
};
s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 &&
- s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
+ s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0
}
fn has_underscore(s: &str) -> bool {
s != "_" && !s.contains("\\_") && s.contains('_')
}
- // Trim punctuation as in `some comment (see foo::bar).`
- // ^^
- // Or even as in `_foo bar_` which is emphasized.
- let word = word.trim_matches(|c: char| !c.is_alphanumeric());
-
- if valid_idents.iter().any(|i| i == word) {
- return;
- }
-
if has_underscore(word) || word.contains("::") || is_camel_case(word) {
- span_lint(cx,
- DOC_MARKDOWN,
- span,
- &format!("you should put `{}` between ticks in the documentation", word));
+ span_lint(
+ cx,
+ DOC_MARKDOWN,
+ span,
+ &format!("you should put `{}` between ticks in the documentation", word),
+ );
}
}