src/librustdoc/passes/html_tags.rs

   1 use super::{span_of_attrs, Pass};
   2 use crate::clean::*;
   3 use crate::core::DocContext;
   4 use crate::fold::DocFolder;
   5 use crate::html::markdown::opts;
   6 use core::ops::Range;
   7 use pulldown_cmark::{Event, Parser};
   8 use rustc_feature::UnstableFeatures;
   9 use rustc_session::lint;
  10
  11 pub const CHECK_INVALID_HTML_TAGS: Pass = Pass {
  12     name: "check-invalid-html-tags",
  13     run: check_invalid_html_tags,
  14     description: "detects invalid HTML tags in doc comments",
  15 };
  16
  17 struct InvalidHtmlTagsLinter<'a, 'tcx> {
  18     cx: &'a DocContext<'tcx>,
  19 }
  20
  21 impl<'a, 'tcx> InvalidHtmlTagsLinter<'a, 'tcx> {
  22     fn new(cx: &'a DocContext<'tcx>) -> Self {
  23         InvalidHtmlTagsLinter { cx }
  24     }
  25 }
  26
  27 pub fn check_invalid_html_tags(krate: Crate, cx: &DocContext<'_>) -> Crate {
  28     if !UnstableFeatures::from_environment().is_nightly_build() {
  29         krate
  30     } else {
  31         let mut coll = InvalidHtmlTagsLinter::new(cx);
  32
  33         coll.fold_crate(krate)
  34     }
  35 }
  36
  37 const ALLOWED_UNCLOSED: &[&str] = &[
  38     "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param",
  39     "source", "track", "wbr",
  40 ];
  41
  42 fn drop_tag(
  43     tags: &mut Vec<(String, Range<usize>)>,
  44     tag_name: String,
  45     range: Range<usize>,
  46     f: &impl Fn(&str, &Range<usize>),
  47 ) {
  48     let tag_name_low = tag_name.to_lowercase();
  49     if let Some(pos) = tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) {
  50         // If the tag is nested inside a "<script>" or a "<style>" tag, no warning should
  51         // be emitted.
  52         let should_not_warn = tags.iter().take(pos + 1).any(|(at, _)| {
  53             let at = at.to_lowercase();
  54             at == "script" || at == "style"
  55         });
  56         for (last_tag_name, last_tag_span) in tags.drain(pos + 1..) {
  57             if should_not_warn {
  58                 continue;
  59             }
  60             let last_tag_name_low = last_tag_name.to_lowercase();
  61             if ALLOWED_UNCLOSED.iter().any(|&at| at == &last_tag_name_low) {
  62                 continue;
  63             }
  64             // `tags` is used as a queue, meaning that everything after `pos` is included inside it.
  65             // So `<h2><h3></h2>` will look like `["h2", "h3"]`. So when closing `h2`, we will still
  66             // have `h3`, meaning the tag wasn't closed as it should have.
  67             f(&format!("unclosed HTML tag `{}`", last_tag_name), &last_tag_span);
  68         }
  69         // Remove the `tag_name` that was originally closed
  70         tags.pop();
  71     } else {
  72         // It can happen for example in this case: `<h2></script></h2>` (the `h2` tag isn't required
  73         // but it helps for the visualization).
  74         f(&format!("unopened HTML tag `{}`", tag_name), &range);
  75     }
  76 }
  77
  78 fn extract_tag(
  79     tags: &mut Vec<(String, Range<usize>)>,
  80     text: &str,
  81     range: Range<usize>,
  82     f: &impl Fn(&str, &Range<usize>),
  83 ) {
  84     let mut iter = text.char_indices().peekable();
  85
  86     while let Some((start_pos, c)) = iter.next() {
  87         if c == '<' {
  88             let mut tag_name = String::new();
  89             let mut is_closing = false;
  90             let mut prev_pos = start_pos;
  91             loop {
  92                 let (pos, c) = match iter.peek() {
  93                     Some((pos, c)) => (*pos, *c),
  94                     // In case we reached the of the doc comment, we want to check that it's an
  95                     // unclosed HTML tag. For example "/// <h3".
  96                     None => (prev_pos, '\0'),
  97                 };
  98                 prev_pos = pos;
  99                 // Checking if this is a closing tag (like `</a>` for `<a>`).
 100                 if c == '/' && tag_name.is_empty() {
 101                     is_closing = true;
 102                 } else if c.is_ascii_alphanumeric() {
 103                     tag_name.push(c);
 104                 } else {
 105                     if !tag_name.is_empty() {
 106                         let mut r =
 107                             Range { start: range.start + start_pos, end: range.start + pos };
 108                         if c == '>' {
 109                             // In case we have a tag without attribute, we can consider the span to
 110                             // refer to it fully.
 111                             r.end += 1;
 112                         }
 113                         if is_closing {
 114                             // In case we have "</div >" or even "</div         >".
 115                             if c != '>' {
 116                                 if !c.is_whitespace() {
 117                                     // It seems like it's not a valid HTML tag.
 118                                     break;
 119                                 }
 120                                 let mut found = false;
 121                                 for (new_pos, c) in text[pos..].char_indices() {
 122                                     if !c.is_whitespace() {
 123                                         if c == '>' {
 124                                             r.end = range.start + new_pos + 1;
 125                                             found = true;
 126                                         }
 127                                         break;
 128                                     }
 129                                 }
 130                                 if !found {
 131                                     break;
 132                                 }
 133                             }
 134                             drop_tag(tags, tag_name, r, f);
 135                         } else {
 136                             tags.push((tag_name, r));
 137                         }
 138                     }
 139                     break;
 140                 }
 141                 iter.next();
 142             }
 143         }
 144     }
 145 }
 146
 147 impl<'a, 'tcx> DocFolder for InvalidHtmlTagsLinter<'a, 'tcx> {
 148     fn fold_item(&mut self, item: Item) -> Option<Item> {
 149         let hir_id = match self.cx.as_local_hir_id(item.def_id) {
 150             Some(hir_id) => hir_id,
 151             None => {
 152                 // If non-local, no need to check anything.
 153                 return self.fold_item_recur(item);
 154             }
 155         };
 156         let dox = item.attrs.collapsed_doc_value().unwrap_or_default();
 157         if !dox.is_empty() {
 158             let cx = &self.cx;
 159             let report_diag = |msg: &str, range: &Range<usize>| {
 160                 let sp = match super::source_span_for_markdown_range(cx, &dox, range, &item.attrs) {
 161                     Some(sp) => sp,
 162                     None => span_of_attrs(&item.attrs).unwrap_or(item.source.span()),
 163                 };
 164                 cx.tcx.struct_span_lint_hir(lint::builtin::INVALID_HTML_TAGS, hir_id, sp, |lint| {
 165                     lint.build(msg).emit()
 166                 });
 167             };
 168
 169             let mut tags = Vec::new();
 170
 171             let p = Parser::new_ext(&dox, opts()).into_offset_iter();
 172
 173             for (event, range) in p {
 174                 match event {
 175                     Event::Html(text) => extract_tag(&mut tags, &text, range, &report_diag),
 176                     _ => {}
 177                 }
 178             }
 179
 180             for (tag, range) in tags.iter().filter(|(t, _)| {
 181                 let t = t.to_lowercase();
 182                 ALLOWED_UNCLOSED.iter().find(|&&at| at == t).is_none()
 183             }) {
 184                 report_diag(&format!("unclosed HTML tag `{}`", tag), range);
 185             }
 186         }
 187
 188         self.fold_item_recur(item)
 189     }
 190 }