]> git.lizzy.rs Git - rust.git/blob - src/librustdoc/html/highlight.rs
Rollup merge of #82296 - spastorino:pubrules, r=nikomatsakis
[rust.git] / src / librustdoc / html / highlight.rs
1 //! Basic syntax highlighting functionality.
2 //!
3 //! This module uses librustc_ast's lexer to provide token-based highlighting for
4 //! the HTML documentation generated by rustdoc.
5 //!
6 //! Use the `render_with_highlighting` to highlight some rust code.
7
8 use crate::html::escape::Escape;
9
10 use std::fmt::Display;
11 use std::iter::Peekable;
12
13 use rustc_lexer::{LiteralKind, TokenKind};
14 use rustc_span::edition::Edition;
15 use rustc_span::symbol::Symbol;
16 use rustc_span::with_default_session_globals;
17
18 use super::format::Buffer;
19
20 /// Highlights `src`, returning the HTML output.
21 crate fn render_with_highlighting(
22     src: &str,
23     out: &mut Buffer,
24     class: Option<&str>,
25     playground_button: Option<&str>,
26     tooltip: Option<(Option<Edition>, &str)>,
27     edition: Edition,
28 ) {
29     debug!("highlighting: ================\n{}\n==============", src);
30     if let Some((edition_info, class)) = tooltip {
31         write!(
32             out,
33             "<div class='information'><div class='tooltip {}'{}>ⓘ</div></div>",
34             class,
35             if let Some(edition_info) = edition_info {
36                 format!(" data-edition=\"{}\"", edition_info)
37             } else {
38                 String::new()
39             },
40         );
41     }
42
43     write_header(out, class);
44     write_code(out, &src, edition);
45     write_footer(out, playground_button);
46 }
47
48 fn write_header(out: &mut Buffer, class: Option<&str>) {
49     write!(out, "<div class=\"example-wrap\"><pre class=\"rust {}\">\n", class.unwrap_or_default());
50 }
51
52 fn write_code(out: &mut Buffer, src: &str, edition: Edition) {
53     // This replace allows to fix how the code source with DOS backline characters is displayed.
54     let src = src.replace("\r\n", "\n");
55     Classifier::new(&src, edition).highlight(&mut |highlight| {
56         match highlight {
57             Highlight::Token { text, class } => string(out, Escape(text), class),
58             Highlight::EnterSpan { class } => enter_span(out, class),
59             Highlight::ExitSpan => exit_span(out),
60         };
61     });
62 }
63
64 fn write_footer(out: &mut Buffer, playground_button: Option<&str>) {
65     write!(out, "</pre>{}</div>\n", playground_button.unwrap_or_default());
66 }
67
68 /// How a span of text is classified. Mostly corresponds to token kinds.
69 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
70 enum Class {
71     Comment,
72     DocComment,
73     Attribute,
74     KeyWord,
75     // Keywords that do pointer/reference stuff.
76     RefKeyWord,
77     Self_,
78     Op,
79     Macro,
80     MacroNonTerminal,
81     String,
82     Number,
83     Bool,
84     Ident,
85     Lifetime,
86     PreludeTy,
87     PreludeVal,
88     QuestionMark,
89 }
90
91 impl Class {
92     /// Returns the css class expected by rustdoc for each `Class`.
93     fn as_html(self) -> &'static str {
94         match self {
95             Class::Comment => "comment",
96             Class::DocComment => "doccomment",
97             Class::Attribute => "attribute",
98             Class::KeyWord => "kw",
99             Class::RefKeyWord => "kw-2",
100             Class::Self_ => "self",
101             Class::Op => "op",
102             Class::Macro => "macro",
103             Class::MacroNonTerminal => "macro-nonterminal",
104             Class::String => "string",
105             Class::Number => "number",
106             Class::Bool => "bool-val",
107             Class::Ident => "ident",
108             Class::Lifetime => "lifetime",
109             Class::PreludeTy => "prelude-ty",
110             Class::PreludeVal => "prelude-val",
111             Class::QuestionMark => "question-mark",
112         }
113     }
114 }
115
116 enum Highlight<'a> {
117     Token { text: &'a str, class: Option<Class> },
118     EnterSpan { class: Class },
119     ExitSpan,
120 }
121
122 struct TokenIter<'a> {
123     src: &'a str,
124 }
125
126 impl Iterator for TokenIter<'a> {
127     type Item = (TokenKind, &'a str);
128     fn next(&mut self) -> Option<(TokenKind, &'a str)> {
129         if self.src.is_empty() {
130             return None;
131         }
132         let token = rustc_lexer::first_token(self.src);
133         let (text, rest) = self.src.split_at(token.len);
134         self.src = rest;
135         Some((token.kind, text))
136     }
137 }
138
139 /// Processes program tokens, classifying strings of text by highlighting
140 /// category (`Class`).
141 struct Classifier<'a> {
142     tokens: Peekable<TokenIter<'a>>,
143     in_attribute: bool,
144     in_macro: bool,
145     in_macro_nonterminal: bool,
146     edition: Edition,
147 }
148
149 impl<'a> Classifier<'a> {
150     fn new(src: &str, edition: Edition) -> Classifier<'_> {
151         let tokens = TokenIter { src }.peekable();
152         Classifier {
153             tokens,
154             in_attribute: false,
155             in_macro: false,
156             in_macro_nonterminal: false,
157             edition,
158         }
159     }
160
161     /// Exhausts the `Classifier` writing the output into `sink`.
162     ///
163     /// The general structure for this method is to iterate over each token,
164     /// possibly giving it an HTML span with a class specifying what flavor of
165     /// token is used.
166     fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'a>)) {
167         with_default_session_globals(|| {
168             while let Some((token, text)) = self.tokens.next() {
169                 self.advance(token, text, sink);
170             }
171         })
172     }
173
174     /// Single step of highlighting. This will classify `token`, but maybe also
175     /// a couple of following ones as well.
176     fn advance(&mut self, token: TokenKind, text: &'a str, sink: &mut dyn FnMut(Highlight<'a>)) {
177         let lookahead = self.peek();
178         let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None });
179         let class = match token {
180             TokenKind::Whitespace => return no_highlight(sink),
181             TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
182                 if doc_style.is_some() {
183                     Class::DocComment
184                 } else {
185                     Class::Comment
186                 }
187             }
188             // Consider this as part of a macro invocation if there was a
189             // leading identifier.
190             TokenKind::Bang if self.in_macro => {
191                 self.in_macro = false;
192                 Class::Macro
193             }
194
195             // Assume that '&' or '*' is the reference or dereference operator
196             // or a reference or pointer type. Unless, of course, it looks like
197             // a logical and or a multiplication operator: `&&` or `* `.
198             TokenKind::Star => match lookahead {
199                 Some(TokenKind::Whitespace) => Class::Op,
200                 _ => Class::RefKeyWord,
201             },
202             TokenKind::And => match lookahead {
203                 Some(TokenKind::And) => {
204                     let _and = self.tokens.next();
205                     sink(Highlight::Token { text: "&&", class: Some(Class::Op) });
206                     return;
207                 }
208                 Some(TokenKind::Eq) => {
209                     let _eq = self.tokens.next();
210                     sink(Highlight::Token { text: "&=", class: Some(Class::Op) });
211                     return;
212                 }
213                 Some(TokenKind::Whitespace) => Class::Op,
214                 _ => Class::RefKeyWord,
215             },
216
217             // Operators.
218             TokenKind::Minus
219             | TokenKind::Plus
220             | TokenKind::Or
221             | TokenKind::Slash
222             | TokenKind::Caret
223             | TokenKind::Percent
224             | TokenKind::Bang
225             | TokenKind::Eq
226             | TokenKind::Lt
227             | TokenKind::Gt => Class::Op,
228
229             // Miscellaneous, no highlighting.
230             TokenKind::Dot
231             | TokenKind::Semi
232             | TokenKind::Comma
233             | TokenKind::OpenParen
234             | TokenKind::CloseParen
235             | TokenKind::OpenBrace
236             | TokenKind::CloseBrace
237             | TokenKind::OpenBracket
238             | TokenKind::At
239             | TokenKind::Tilde
240             | TokenKind::Colon
241             | TokenKind::Unknown => return no_highlight(sink),
242
243             TokenKind::Question => Class::QuestionMark,
244
245             TokenKind::Dollar => match lookahead {
246                 Some(TokenKind::Ident) => {
247                     self.in_macro_nonterminal = true;
248                     Class::MacroNonTerminal
249                 }
250                 _ => return no_highlight(sink),
251             },
252
253             // This might be the start of an attribute. We're going to want to
254             // continue highlighting it as an attribute until the ending ']' is
255             // seen, so skip out early. Down below we terminate the attribute
256             // span when we see the ']'.
257             TokenKind::Pound => {
258                 match lookahead {
259                     // Case 1: #![inner_attribute]
260                     Some(TokenKind::Bang) => {
261                         let _not = self.tokens.next().unwrap();
262                         if let Some(TokenKind::OpenBracket) = self.peek() {
263                             self.in_attribute = true;
264                             sink(Highlight::EnterSpan { class: Class::Attribute });
265                         }
266                         sink(Highlight::Token { text: "#", class: None });
267                         sink(Highlight::Token { text: "!", class: None });
268                         return;
269                     }
270                     // Case 2: #[outer_attribute]
271                     Some(TokenKind::OpenBracket) => {
272                         self.in_attribute = true;
273                         sink(Highlight::EnterSpan { class: Class::Attribute });
274                     }
275                     _ => (),
276                 }
277                 return no_highlight(sink);
278             }
279             TokenKind::CloseBracket => {
280                 if self.in_attribute {
281                     self.in_attribute = false;
282                     sink(Highlight::Token { text: "]", class: None });
283                     sink(Highlight::ExitSpan);
284                     return;
285                 }
286                 return no_highlight(sink);
287             }
288             TokenKind::Literal { kind, .. } => match kind {
289                 // Text literals.
290                 LiteralKind::Byte { .. }
291                 | LiteralKind::Char { .. }
292                 | LiteralKind::Str { .. }
293                 | LiteralKind::ByteStr { .. }
294                 | LiteralKind::RawStr { .. }
295                 | LiteralKind::RawByteStr { .. } => Class::String,
296                 // Number literals.
297                 LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
298             },
299             TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
300                 self.in_macro = true;
301                 Class::Macro
302             }
303             TokenKind::Ident => match text {
304                 "ref" | "mut" => Class::RefKeyWord,
305                 "self" | "Self" => Class::Self_,
306                 "false" | "true" => Class::Bool,
307                 "Option" | "Result" => Class::PreludeTy,
308                 "Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
309                 // Keywords are also included in the identifier set.
310                 _ if Symbol::intern(text).is_reserved(|| self.edition) => Class::KeyWord,
311                 _ if self.in_macro_nonterminal => {
312                     self.in_macro_nonterminal = false;
313                     Class::MacroNonTerminal
314                 }
315                 _ => Class::Ident,
316             },
317             TokenKind::RawIdent => Class::Ident,
318             TokenKind::Lifetime { .. } => Class::Lifetime,
319         };
320         // Anything that didn't return above is the simple case where we the
321         // class just spans a single token, so we can use the `string` method.
322         sink(Highlight::Token { text, class: Some(class) });
323     }
324
325     fn peek(&mut self) -> Option<TokenKind> {
326         self.tokens.peek().map(|(toke_kind, _text)| *toke_kind)
327     }
328 }
329
330 /// Called when we start processing a span of text that should be highlighted.
331 /// The `Class` argument specifies how it should be highlighted.
332 fn enter_span(out: &mut Buffer, klass: Class) {
333     write!(out, "<span class=\"{}\">", klass.as_html());
334 }
335
336 /// Called at the end of a span of highlighted text.
337 fn exit_span(out: &mut Buffer) {
338     out.write_str("</span>");
339 }
340
341 /// Called for a span of text. If the text should be highlighted differently
342 /// from the surrounding text, then the `Class` argument will be a value other
343 /// than `None`.
344 ///
345 /// The following sequences of callbacks are equivalent:
346 /// ```plain
347 ///     enter_span(Foo), string("text", None), exit_span()
348 ///     string("text", Foo)
349 /// ```
350 /// The latter can be thought of as a shorthand for the former, which is more
351 /// flexible.
352 fn string<T: Display>(out: &mut Buffer, text: T, klass: Option<Class>) {
353     match klass {
354         None => write!(out, "{}", text),
355         Some(klass) => write!(out, "<span class=\"{}\">{}</span>", klass.as_html(), text),
356     }
357 }
358
359 #[cfg(test)]
360 mod tests;