]> git.lizzy.rs Git - rust.git/blob - src/librustdoc/passes/bare_urls.rs
Auto merge of #102596 - scottmcm:option-bool-calloc, r=Mark-Simulacrum
[rust.git] / src / librustdoc / passes / bare_urls.rs
1 //! Detects links that are not linkified, e.g., in Markdown such as `Go to https://example.com/.`
2 //! Suggests wrapping the link with angle brackets: `Go to <https://example.com/>.` to linkify it.
3 use super::Pass;
4 use crate::clean::*;
5 use crate::core::DocContext;
6 use crate::html::markdown::main_body_opts;
7 use crate::visit::DocVisitor;
8 use core::ops::Range;
9 use pulldown_cmark::{Event, Parser, Tag};
10 use regex::Regex;
11 use rustc_errors::Applicability;
12 use std::mem;
13 use std::sync::LazyLock;
14
15 pub(crate) const CHECK_BARE_URLS: Pass = Pass {
16     name: "check-bare-urls",
17     run: check_bare_urls,
18     description: "detects URLs that are not hyperlinks",
19 };
20
21 static URL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
22     Regex::new(concat!(
23         r"https?://",                          // url scheme
24         r"([-a-zA-Z0-9@:%._\+~#=]{2,256}\.)+", // one or more subdomains
25         r"[a-zA-Z]{2,63}",                     // root domain
26         r"\b([-a-zA-Z0-9@:%_\+.~#?&/=]*)"      // optional query or url fragments
27     ))
28     .expect("failed to build regex")
29 });
30
31 struct BareUrlsLinter<'a, 'tcx> {
32     cx: &'a mut DocContext<'tcx>,
33 }
34
35 impl<'a, 'tcx> BareUrlsLinter<'a, 'tcx> {
36     fn find_raw_urls(
37         &self,
38         text: &str,
39         range: Range<usize>,
40         f: &impl Fn(&DocContext<'_>, &str, &str, Range<usize>),
41     ) {
42         trace!("looking for raw urls in {}", text);
43         // For now, we only check "full" URLs (meaning, starting with "http://" or "https://").
44         for match_ in URL_REGEX.find_iter(text) {
45             let url = match_.as_str();
46             let url_range = match_.range();
47             f(
48                 self.cx,
49                 "this URL is not a hyperlink",
50                 url,
51                 Range { start: range.start + url_range.start, end: range.start + url_range.end },
52             );
53         }
54     }
55 }
56
57 pub(crate) fn check_bare_urls(krate: Crate, cx: &mut DocContext<'_>) -> Crate {
58     BareUrlsLinter { cx }.visit_crate(&krate);
59     krate
60 }
61
62 impl<'a, 'tcx> DocVisitor for BareUrlsLinter<'a, 'tcx> {
63     fn visit_item(&mut self, item: &Item) {
64         let Some(hir_id) = DocContext::as_local_hir_id(self.cx.tcx, item.item_id)
65         else {
66             // If non-local, no need to check anything.
67             return;
68         };
69         let dox = item.attrs.collapsed_doc_value().unwrap_or_default();
70         if !dox.is_empty() {
71             let report_diag = |cx: &DocContext<'_>, msg: &str, url: &str, range: Range<usize>| {
72                 let sp = super::source_span_for_markdown_range(cx.tcx, &dox, &range, &item.attrs)
73                     .unwrap_or_else(|| item.attr_span(cx.tcx));
74                 cx.tcx.struct_span_lint_hir(crate::lint::BARE_URLS, hir_id, sp, msg, |lint| {
75                     lint.note("bare URLs are not automatically turned into clickable links")
76                         .span_suggestion(
77                             sp,
78                             "use an automatic link instead",
79                             format!("<{}>", url),
80                             Applicability::MachineApplicable,
81                         )
82                 });
83             };
84
85             let mut p = Parser::new_ext(&dox, main_body_opts()).into_offset_iter();
86
87             while let Some((event, range)) = p.next() {
88                 match event {
89                     Event::Text(s) => self.find_raw_urls(&s, range, &report_diag),
90                     // We don't want to check the text inside code blocks or links.
91                     Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link(..))) => {
92                         while let Some((event, _)) = p.next() {
93                             match event {
94                                 Event::End(end)
95                                     if mem::discriminant(&end) == mem::discriminant(&tag) =>
96                                 {
97                                     break;
98                                 }
99                                 _ => {}
100                             }
101                         }
102                     }
103                     _ => {}
104                 }
105             }
106         }
107
108         self.visit_item_recur(item)
109     }
110 }