From e6027a42e109fef10f4fc27ebede50d1b3d203f0 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Wed, 23 Sep 2020 20:25:56 +0200 Subject: [PATCH] Add `unclosed_html_tags` lint --- compiler/rustc_lint/src/lib.rs | 7 +- compiler/rustc_session/src/lint/builtin.rs | 11 ++ src/librustdoc/core.rs | 2 + src/librustdoc/html/markdown.rs | 2 +- src/librustdoc/passes/html_tags.rs | 135 +++++++++++++++++++ src/librustdoc/passes/mod.rs | 5 + src/test/rustdoc-ui/intra-link-errors.rs | 1 + src/test/rustdoc-ui/intra-link-errors.stderr | 59 +++++++- 8 files changed, 213 insertions(+), 9 deletions(-) create mode 100644 src/librustdoc/passes/html_tags.rs diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs index 33caedfc198..591be417161 100644 --- a/compiler/rustc_lint/src/lib.rs +++ b/compiler/rustc_lint/src/lib.rs @@ -63,8 +63,8 @@ use rustc_middle::ty::TyCtxt; use rustc_session::lint::builtin::{ BARE_TRAIT_OBJECTS, BROKEN_INTRA_DOC_LINKS, ELIDED_LIFETIMES_IN_PATHS, - EXPLICIT_OUTLIVES_REQUIREMENTS, INVALID_CODEBLOCK_ATTRIBUTES, MISSING_DOC_CODE_EXAMPLES, - PRIVATE_DOC_TESTS, + EXPLICIT_OUTLIVES_REQUIREMENTS, INVALID_CODEBLOCK_ATTRIBUTES, INVALID_HTML_TAGS, + MISSING_DOC_CODE_EXAMPLES, PRIVATE_DOC_TESTS, }; use rustc_span::symbol::{Ident, Symbol}; use rustc_span::Span; @@ -308,7 +308,8 @@ macro_rules! register_passes { PRIVATE_INTRA_DOC_LINKS, INVALID_CODEBLOCK_ATTRIBUTES, MISSING_DOC_CODE_EXAMPLES, - PRIVATE_DOC_TESTS + PRIVATE_DOC_TESTS, + INVALID_HTML_TAGS ); // Register renamed and removed lints. diff --git a/compiler/rustc_session/src/lint/builtin.rs b/compiler/rustc_session/src/lint/builtin.rs index 0cc97fb4541..ce2a14b4481 100644 --- a/compiler/rustc_session/src/lint/builtin.rs +++ b/compiler/rustc_session/src/lint/builtin.rs @@ -1881,6 +1881,16 @@ "detects code samples in docs of private items not documented by rustdoc" } +declare_lint! { + /// The `invalid_html_tags` lint detects invalid HTML tags. This is a + /// `rustdoc` only lint, see the documentation in the [rustdoc book]. + /// + /// [rustdoc book]: ../../../rustdoc/lints.html#invalid_html_tags + pub INVALID_HTML_TAGS, + Warn, + "detects invalid HTML tags in doc comments" +} + declare_lint! { /// The `where_clauses_object_safety` lint detects for [object safety] of /// [where clauses]. @@ -2699,6 +2709,7 @@ INVALID_CODEBLOCK_ATTRIBUTES, MISSING_CRATE_LEVEL_DOCS, MISSING_DOC_CODE_EXAMPLES, + INVALID_HTML_TAGS, PRIVATE_DOC_TESTS, WHERE_CLAUSES_OBJECT_SAFETY, PROC_MACRO_DERIVE_RESOLUTION_FALLBACK, diff --git a/src/librustdoc/core.rs b/src/librustdoc/core.rs index 391859050e8..45a84c4fb30 100644 --- a/src/librustdoc/core.rs +++ b/src/librustdoc/core.rs @@ -328,6 +328,7 @@ pub fn run_core( let private_doc_tests = rustc_lint::builtin::PRIVATE_DOC_TESTS.name; let no_crate_level_docs = rustc_lint::builtin::MISSING_CRATE_LEVEL_DOCS.name; let invalid_codeblock_attributes_name = rustc_lint::builtin::INVALID_CODEBLOCK_ATTRIBUTES.name; + let invalid_html_tags = rustc_lint::builtin::INVALID_HTML_TAGS.name; let renamed_and_removed_lints = rustc_lint::builtin::RENAMED_AND_REMOVED_LINTS.name; let unknown_lints = rustc_lint::builtin::UNKNOWN_LINTS.name; @@ -340,6 +341,7 @@ pub fn run_core( private_doc_tests.to_owned(), no_crate_level_docs.to_owned(), invalid_codeblock_attributes_name.to_owned(), + invalid_html_tags.to_owned(), renamed_and_removed_lints.to_owned(), unknown_lints.to_owned(), ]; diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 6c0f1c02ac6..2fd06d7e573 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -43,7 +43,7 @@ #[cfg(test)] mod tests; -fn opts() -> Options { +pub(crate) fn opts() -> Options { Options::ENABLE_TABLES | Options::ENABLE_FOOTNOTES | Options::ENABLE_STRIKETHROUGH } diff --git a/src/librustdoc/passes/html_tags.rs b/src/librustdoc/passes/html_tags.rs new file mode 100644 index 00000000000..b177eaeeb73 --- /dev/null +++ b/src/librustdoc/passes/html_tags.rs @@ -0,0 +1,135 @@ +use super::{span_of_attrs, Pass}; +use crate::clean::*; +use crate::core::DocContext; +use crate::fold::DocFolder; +use crate::html::markdown::opts; +use pulldown_cmark::{Event, Parser}; +use rustc_hir::hir_id::HirId; +use rustc_session::lint; +use rustc_span::Span; + +pub const CHECK_INVALID_HTML_TAGS: Pass = Pass { + name: "check-invalid-html-tags", + run: check_invalid_html_tags, + description: "detects invalid HTML tags in doc comments", +}; + +struct InvalidHtmlTagsLinter<'a, 'tcx> { + cx: &'a DocContext<'tcx>, +} + +impl<'a, 'tcx> InvalidHtmlTagsLinter<'a, 'tcx> { + fn new(cx: &'a DocContext<'tcx>) -> Self { + InvalidHtmlTagsLinter { cx } + } +} + +pub fn check_invalid_html_tags(krate: Crate, cx: &DocContext<'_>) -> Crate { + let mut coll = InvalidHtmlTagsLinter::new(cx); + + coll.fold_crate(krate) +} + +const ALLOWED_UNCLOSED: &[&str] = &[ + "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", + "source", "track", "wbr", +]; + +fn drop_tag( + cx: &DocContext<'_>, + tags: &mut Vec, + tag_name: String, + hir_id: HirId, + sp: Span, +) { + if let Some(pos) = tags.iter().position(|t| *t == tag_name) { + for _ in pos + 1..tags.len() { + if ALLOWED_UNCLOSED.iter().find(|&at| at == &tags[pos + 1]).is_some() { + continue; + } + // `tags` is used as a queue, meaning that everything after `pos` is included inside it. + // So `

` will look like `["h2", "h3"]`. So when closing `h2`, we will still + // have `h3`, meaning the tag wasn't closed as it should have. + cx.tcx.struct_span_lint_hir(lint::builtin::INVALID_HTML_TAGS, hir_id, sp, |lint| { + lint.build(&format!("unclosed HTML tag `{}`", tags[pos + 1])).emit() + }); + tags.remove(pos + 1); + } + tags.remove(pos); + } else { + // It can happen for example in this case: `

` (the `h2` tag isn't required + // but it helps for the visualization). + cx.tcx.struct_span_lint_hir(lint::builtin::INVALID_HTML_TAGS, hir_id, sp, |lint| { + lint.build(&format!("unopened HTML tag `{}`", tag_name)).emit() + }); + } +} + +fn extract_tag(cx: &DocContext<'_>, tags: &mut Vec, text: &str, hir_id: HirId, sp: Span) { + let mut iter = text.chars().peekable(); + + while let Some(c) = iter.next() { + if c == '<' { + let mut tag_name = String::new(); + let mut is_closing = false; + while let Some(&c) = iter.peek() { + // + if c == '/' && tag_name.is_empty() { + is_closing = true; + } else if c.is_ascii_alphanumeric() && !c.is_ascii_uppercase() { + tag_name.push(c); + } else { + break; + } + iter.next(); + } + if tag_name.is_empty() { + // Not an HTML tag presumably... + continue; + } + if is_closing { + drop_tag(cx, tags, tag_name, hir_id, sp); + } else { + tags.push(tag_name); + } + } + } +} + +impl<'a, 'tcx> DocFolder for InvalidHtmlTagsLinter<'a, 'tcx> { + fn fold_item(&mut self, item: Item) -> Option { + let hir_id = match self.cx.as_local_hir_id(item.def_id) { + Some(hir_id) => hir_id, + None => { + // If non-local, no need to check anything. + return None; + } + }; + let dox = item.attrs.collapsed_doc_value().unwrap_or_default(); + if !dox.is_empty() { + let sp = span_of_attrs(&item.attrs).unwrap_or(item.source.span()); + let mut tags = Vec::new(); + + let p = Parser::new_ext(&dox, opts()); + + for event in p { + match event { + Event::Html(text) => extract_tag(self.cx, &mut tags, &text, hir_id, sp), + _ => {} + } + } + + for tag in tags.iter().filter(|t| ALLOWED_UNCLOSED.iter().find(|at| at == t).is_none()) + { + self.cx.tcx.struct_span_lint_hir( + lint::builtin::INVALID_HTML_TAGS, + hir_id, + sp, + |lint| lint.build(&format!("unclosed HTML tag `{}`", tag)).emit(), + ); + } + } + + self.fold_item_recur(item) + } +} diff --git a/src/librustdoc/passes/mod.rs b/src/librustdoc/passes/mod.rs index 75a65966667..3819aaee560 100644 --- a/src/librustdoc/passes/mod.rs +++ b/src/librustdoc/passes/mod.rs @@ -45,6 +45,9 @@ mod calculate_doc_coverage; pub use self::calculate_doc_coverage::CALCULATE_DOC_COVERAGE; +mod html_tags; +pub use self::html_tags::CHECK_INVALID_HTML_TAGS; + /// A single pass over the cleaned documentation. /// /// Runs in the compiler context, so it has access to types and traits and the like. @@ -87,6 +90,7 @@ pub enum Condition { CHECK_CODE_BLOCK_SYNTAX, COLLECT_TRAIT_IMPLS, CALCULATE_DOC_COVERAGE, + CHECK_INVALID_HTML_TAGS, ]; /// The list of passes run by default. @@ -101,6 +105,7 @@ pub enum Condition { ConditionalPass::always(COLLECT_INTRA_DOC_LINKS), ConditionalPass::always(CHECK_CODE_BLOCK_SYNTAX), ConditionalPass::always(PROPAGATE_DOC_CFG), + ConditionalPass::always(CHECK_INVALID_HTML_TAGS), ]; /// The list of default passes run when `--doc-coverage` is passed to rustdoc. diff --git a/src/test/rustdoc-ui/intra-link-errors.rs b/src/test/rustdoc-ui/intra-link-errors.rs index 0278caf3087..bd4db6ad617 100644 --- a/src/test/rustdoc-ui/intra-link-errors.rs +++ b/src/test/rustdoc-ui/intra-link-errors.rs @@ -1,3 +1,4 @@ +#![allow(unclosed_html_tags)] #![deny(broken_intra_doc_links)] //~^ NOTE lint level is defined diff --git a/src/test/rustdoc-ui/intra-link-errors.stderr b/src/test/rustdoc-ui/intra-link-errors.stderr index b63f799535a..13ed978d613 100644 --- a/src/test/rustdoc-ui/intra-link-errors.stderr +++ b/src/test/rustdoc-ui/intra-link-errors.stderr @@ -1,35 +1,40 @@ error: unresolved link to `path::to::nonexistent::module` - --> $DIR/intra-link-errors.rs:7:6 + --> $DIR/intra-link-errors.rs:8:6 | LL | /// [path::to::nonexistent::module] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ the module `intra_link_errors` contains no item named `path` | note: the lint level is defined here - --> $DIR/intra-link-errors.rs:1:9 + --> $DIR/intra-link-errors.rs:2:9 | LL | #![deny(broken_intra_doc_links)] | ^^^^^^^^^^^^^^^^^^^^^^ error: unresolved link to `path::to::nonexistent::macro` - --> $DIR/intra-link-errors.rs:11:6 + --> $DIR/intra-link-errors.rs:12:6 | LL | /// [path::to::nonexistent::macro!] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ the module `intra_link_errors` contains no item named `path` error: unresolved link to `path::to::nonexistent::type` - --> $DIR/intra-link-errors.rs:15:6 + --> $DIR/intra-link-errors.rs:16:6 | LL | /// [type@path::to::nonexistent::type] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ the module `intra_link_errors` contains no item named `path` error: unresolved link to `std::io::not::here` - --> $DIR/intra-link-errors.rs:19:6 + --> $DIR/intra-link-errors.rs:20:6 | LL | /// [std::io::not::here] | ^^^^^^^^^^^^^^^^^^ the module `io` contains no item named `not` +<<<<<<< HEAD error: unresolved link to `std::io::not::here` --> $DIR/intra-link-errors.rs:23:6 +======= +error: unresolved link to `std::io::Error::x` + --> $DIR/intra-link-errors.rs:24:6 +>>>>>>> Add `unclosed_html_tags` lint | LL | /// [type@std::io::not::here] | ^^^^^^^^^^^^^^^^^^^^^^^ the module `io` contains no item named `not` @@ -41,13 +46,21 @@ LL | /// [std::io::Error::x] | ^^^^^^^^^^^^^^^^^ the struct `Error` has no field or associated item named `x` error: unresolved link to `std::io::ErrorKind::x` +<<<<<<< HEAD --> $DIR/intra-link-errors.rs:31:6 +======= + --> $DIR/intra-link-errors.rs:28:6 +>>>>>>> Add `unclosed_html_tags` lint | LL | /// [std::io::ErrorKind::x] | ^^^^^^^^^^^^^^^^^^^^^ the enum `ErrorKind` has no variant or associated item named `x` error: unresolved link to `f::A` +<<<<<<< HEAD --> $DIR/intra-link-errors.rs:35:6 +======= + --> $DIR/intra-link-errors.rs:32:6 +>>>>>>> Add `unclosed_html_tags` lint | LL | /// [f::A] | ^^^^ `f` is a function, not a module or type, and cannot have associated items @@ -59,25 +72,41 @@ LL | /// [f::A!] | ^^^^^ `f` is a function, not a module or type, and cannot have associated items error: unresolved link to `S::A` +<<<<<<< HEAD --> $DIR/intra-link-errors.rs:43:6 +======= + --> $DIR/intra-link-errors.rs:36:6 +>>>>>>> Add `unclosed_html_tags` lint | LL | /// [S::A] | ^^^^ the struct `S` has no field or associated item named `A` error: unresolved link to `S::fmt` +<<<<<<< HEAD --> $DIR/intra-link-errors.rs:47:6 +======= + --> $DIR/intra-link-errors.rs:40:6 +>>>>>>> Add `unclosed_html_tags` lint | LL | /// [S::fmt] | ^^^^^^ the struct `S` has no field or associated item named `fmt` error: unresolved link to `E::D` +<<<<<<< HEAD --> $DIR/intra-link-errors.rs:51:6 +======= + --> $DIR/intra-link-errors.rs:44:6 +>>>>>>> Add `unclosed_html_tags` lint | LL | /// [E::D] | ^^^^ the enum `E` has no variant or associated item named `D` error: unresolved link to `u8::not_found` +<<<<<<< HEAD --> $DIR/intra-link-errors.rs:55:6 +======= + --> $DIR/intra-link-errors.rs:48:6 +>>>>>>> Add `unclosed_html_tags` lint | LL | /// [u8::not_found] | ^^^^^^^^^^^^^ the builtin type `u8` has no associated item named `not_found` @@ -98,7 +127,11 @@ LL | /// [type@Vec::into_iter] | help: to link to the associated function, add parentheses: `Vec::into_iter()` error: unresolved link to `S` +<<<<<<< HEAD --> $DIR/intra-link-errors.rs:68:6 +======= + --> $DIR/intra-link-errors.rs:52:6 +>>>>>>> Add `unclosed_html_tags` lint | LL | /// [S!] | ^^ @@ -107,7 +140,11 @@ LL | /// [S!] | help: to link to the struct, prefix with `struct@`: `struct@S` error: unresolved link to `T::g` +<<<<<<< HEAD --> $DIR/intra-link-errors.rs:86:6 +======= + --> $DIR/intra-link-errors.rs:70:6 +>>>>>>> Add `unclosed_html_tags` lint | LL | /// [type@T::g] | ^^^^^^^^^ @@ -116,13 +153,21 @@ LL | /// [type@T::g] | help: to link to the associated function, add parentheses: `T::g()` error: unresolved link to `T::h` +<<<<<<< HEAD --> $DIR/intra-link-errors.rs:91:6 +======= + --> $DIR/intra-link-errors.rs:75:6 +>>>>>>> Add `unclosed_html_tags` lint | LL | /// [T::h!] | ^^^^^ the trait `T` has no macro named `h` error: unresolved link to `S::h` +<<<<<<< HEAD --> $DIR/intra-link-errors.rs:78:6 +======= + --> $DIR/intra-link-errors.rs:62:6 +>>>>>>> Add `unclosed_html_tags` lint | LL | /// [type@S::h] | ^^^^^^^^^ @@ -131,7 +176,11 @@ LL | /// [type@S::h] | help: to link to the associated function, add parentheses: `S::h()` error: unresolved link to `m` +<<<<<<< HEAD --> $DIR/intra-link-errors.rs:98:6 +======= + --> $DIR/intra-link-errors.rs:82:6 +>>>>>>> Add `unclosed_html_tags` lint | LL | /// [m()] | ^^^ -- 2.44.0