1 //! Basic syntax highlighting functionality.
3 //! This module uses librustc_ast's lexer to provide token-based highlighting for
4 //! the HTML documentation generated by rustdoc.
6 //! Use the `render_with_highlighting` to highlight some rust code.
8 use crate::html::escape::Escape;
10 use std::fmt::Display;
11 use std::iter::Peekable;
13 use rustc_lexer::{LiteralKind, TokenKind};
14 use rustc_span::edition::Edition;
15 use rustc_span::symbol::Symbol;
17 use super::format::Buffer;
19 /// Highlights `src`, returning the HTML output.
20 crate fn render_with_highlighting(
24 playground_button: Option<&str>,
25 tooltip: Option<(Option<Edition>, &str)>,
27 extra_content: Option<Buffer>,
29 debug!("highlighting: ================\n{}\n==============", src);
30 if let Some((edition_info, class)) = tooltip {
33 "<div class='information'><div class='tooltip {}'{}>ⓘ</div></div>",
35 if let Some(edition_info) = edition_info {
36 format!(" data-edition=\"{}\"", edition_info)
43 write_header(out, class, extra_content);
44 write_code(out, &src, edition);
45 write_footer(out, playground_button);
48 fn write_header(out: &mut Buffer, class: Option<&str>, extra_content: Option<Buffer>) {
49 write!(out, "<div class=\"example-wrap\">");
50 if let Some(extra) = extra_content {
51 out.push_buffer(extra);
53 if let Some(class) = class {
54 writeln!(out, "<pre class=\"rust {}\">", class);
56 writeln!(out, "<pre class=\"rust\">");
60 fn write_code(out: &mut Buffer, src: &str, edition: Edition) {
61 // This replace allows to fix how the code source with DOS backline characters is displayed.
62 let src = src.replace("\r\n", "\n");
63 Classifier::new(&src, edition).highlight(&mut |highlight| {
65 Highlight::Token { text, class } => string(out, Escape(text), class),
66 Highlight::EnterSpan { class } => enter_span(out, class),
67 Highlight::ExitSpan => exit_span(out),
72 fn write_footer(out: &mut Buffer, playground_button: Option<&str>) {
73 writeln!(out, "</pre>{}</div>", playground_button.unwrap_or_default());
76 /// How a span of text is classified. Mostly corresponds to token kinds.
77 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
83 // Keywords that do pointer/reference stuff.
100 /// Returns the css class expected by rustdoc for each `Class`.
101 fn as_html(self) -> &'static str {
103 Class::Comment => "comment",
104 Class::DocComment => "doccomment",
105 Class::Attribute => "attribute",
106 Class::KeyWord => "kw",
107 Class::RefKeyWord => "kw-2",
108 Class::Self_ => "self",
110 Class::Macro => "macro",
111 Class::MacroNonTerminal => "macro-nonterminal",
112 Class::String => "string",
113 Class::Number => "number",
114 Class::Bool => "bool-val",
115 Class::Ident => "ident",
116 Class::Lifetime => "lifetime",
117 Class::PreludeTy => "prelude-ty",
118 Class::PreludeVal => "prelude-val",
119 Class::QuestionMark => "question-mark",
125 Token { text: &'a str, class: Option<Class> },
126 EnterSpan { class: Class },
130 struct TokenIter<'a> {
134 impl Iterator for TokenIter<'a> {
135 type Item = (TokenKind, &'a str);
136 fn next(&mut self) -> Option<(TokenKind, &'a str)> {
137 if self.src.is_empty() {
140 let token = rustc_lexer::first_token(self.src);
141 let (text, rest) = self.src.split_at(token.len);
143 Some((token.kind, text))
147 fn get_real_ident_class(text: &str, edition: Edition) -> Class {
149 "ref" | "mut" => Class::RefKeyWord,
150 "self" | "Self" => Class::Self_,
151 "false" | "true" => Class::Bool,
152 _ if Symbol::intern(text).is_reserved(|| edition) => Class::KeyWord,
157 /// Processes program tokens, classifying strings of text by highlighting
158 /// category (`Class`).
159 struct Classifier<'a> {
160 tokens: Peekable<TokenIter<'a>>,
163 in_macro_nonterminal: bool,
169 impl<'a> Classifier<'a> {
170 fn new(src: &str, edition: Edition) -> Classifier<'_> {
171 let tokens = TokenIter { src }.peekable();
176 in_macro_nonterminal: false,
183 /// Concatenate colons and idents as one when possible.
184 fn get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)> {
185 let start = self.byte_pos as usize;
187 let mut has_ident = false;
188 let edition = self.edition;
192 while let Some((TokenKind::Colon, _)) = self.tokens.peek() {
196 // Ident path can start with "::" but if we already have content in the ident path,
197 // the "::" is mandatory.
198 if has_ident && nb == 0 {
199 return vec![(TokenKind::Ident, start, pos)];
200 } else if nb != 0 && nb != 2 {
202 return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
204 return vec![(TokenKind::Colon, pos, pos + nb)];
208 if let Some((Class::Ident, text)) = self.tokens.peek().map(|(token, text)| {
209 if *token == TokenKind::Ident {
210 let class = get_real_ident_class(text, edition);
213 // Doesn't matter which Class we put in here...
214 (Class::Comment, text)
217 // We only "add" the colon if there is an ident behind.
218 pos += text.len() + nb;
221 } else if nb > 0 && has_ident {
222 return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
224 return vec![(TokenKind::Colon, pos, pos + nb)];
225 } else if has_ident {
226 return vec![(TokenKind::Ident, start, pos)];
233 /// Wraps the tokens iteration to ensure that the byte_pos is always correct.
234 fn next(&mut self) -> Option<(TokenKind, &'a str)> {
235 if let Some((kind, text)) = self.tokens.next() {
236 self.byte_pos += text.len() as u32;
243 /// Exhausts the `Classifier` writing the output into `sink`.
245 /// The general structure for this method is to iterate over each token,
246 /// possibly giving it an HTML span with a class specifying what flavor of
248 fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'a>)) {
253 .map(|t| matches!(t.0, TokenKind::Colon | TokenKind::Ident))
256 let tokens = self.get_full_ident_path();
257 for (token, start, end) in tokens {
258 let text = &self.src[start..end];
259 self.advance(token, text, sink);
260 self.byte_pos += text.len() as u32;
263 if let Some((token, text)) = self.next() {
264 self.advance(token, text, sink);
271 /// Single step of highlighting. This will classify `token`, but maybe also
272 /// a couple of following ones as well.
273 fn advance(&mut self, token: TokenKind, text: &'a str, sink: &mut dyn FnMut(Highlight<'a>)) {
274 let lookahead = self.peek();
275 let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None });
276 let class = match token {
277 TokenKind::Whitespace => return no_highlight(sink),
278 TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
279 if doc_style.is_some() {
285 // Consider this as part of a macro invocation if there was a
286 // leading identifier.
287 TokenKind::Bang if self.in_macro => {
288 self.in_macro = false;
289 sink(Highlight::Token { text, class: None });
290 sink(Highlight::ExitSpan);
294 // Assume that '&' or '*' is the reference or dereference operator
295 // or a reference or pointer type. Unless, of course, it looks like
296 // a logical and or a multiplication operator: `&&` or `* `.
297 TokenKind::Star => match lookahead {
298 Some(TokenKind::Whitespace) => Class::Op,
299 _ => Class::RefKeyWord,
301 TokenKind::And => match lookahead {
302 Some(TokenKind::And) => {
304 sink(Highlight::Token { text: "&&", class: Some(Class::Op) });
307 Some(TokenKind::Eq) => {
309 sink(Highlight::Token { text: "&=", class: Some(Class::Op) });
312 Some(TokenKind::Whitespace) => Class::Op,
313 _ => Class::RefKeyWord,
326 | TokenKind::Gt => Class::Op,
328 // Miscellaneous, no highlighting.
332 | TokenKind::OpenParen
333 | TokenKind::CloseParen
334 | TokenKind::OpenBrace
335 | TokenKind::CloseBrace
336 | TokenKind::OpenBracket
340 | TokenKind::Unknown => return no_highlight(sink),
342 TokenKind::Question => Class::QuestionMark,
344 TokenKind::Dollar => match lookahead {
345 Some(TokenKind::Ident) => {
346 self.in_macro_nonterminal = true;
347 Class::MacroNonTerminal
349 _ => return no_highlight(sink),
352 // This might be the start of an attribute. We're going to want to
353 // continue highlighting it as an attribute until the ending ']' is
354 // seen, so skip out early. Down below we terminate the attribute
355 // span when we see the ']'.
356 TokenKind::Pound => {
358 // Case 1: #![inner_attribute]
359 Some(TokenKind::Bang) => {
361 if let Some(TokenKind::OpenBracket) = self.peek() {
362 self.in_attribute = true;
363 sink(Highlight::EnterSpan { class: Class::Attribute });
365 sink(Highlight::Token { text: "#", class: None });
366 sink(Highlight::Token { text: "!", class: None });
369 // Case 2: #[outer_attribute]
370 Some(TokenKind::OpenBracket) => {
371 self.in_attribute = true;
372 sink(Highlight::EnterSpan { class: Class::Attribute });
376 return no_highlight(sink);
378 TokenKind::CloseBracket => {
379 if self.in_attribute {
380 self.in_attribute = false;
381 sink(Highlight::Token { text: "]", class: None });
382 sink(Highlight::ExitSpan);
385 return no_highlight(sink);
387 TokenKind::Literal { kind, .. } => match kind {
389 LiteralKind::Byte { .. }
390 | LiteralKind::Char { .. }
391 | LiteralKind::Str { .. }
392 | LiteralKind::ByteStr { .. }
393 | LiteralKind::RawStr { .. }
394 | LiteralKind::RawByteStr { .. } => Class::String,
396 LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
398 TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
399 self.in_macro = true;
400 sink(Highlight::EnterSpan { class: Class::Macro });
401 sink(Highlight::Token { text, class: None });
404 TokenKind::Ident => match get_real_ident_class(text, self.edition) {
405 Class::Ident => match text {
406 "Option" | "Result" => Class::PreludeTy,
407 "Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
408 _ if self.in_macro_nonterminal => {
409 self.in_macro_nonterminal = false;
410 Class::MacroNonTerminal
416 TokenKind::RawIdent => Class::Ident,
417 TokenKind::Lifetime { .. } => Class::Lifetime,
419 // Anything that didn't return above is the simple case where we the
420 // class just spans a single token, so we can use the `string` method.
421 sink(Highlight::Token { text, class: Some(class) });
424 fn peek(&mut self) -> Option<TokenKind> {
425 self.tokens.peek().map(|(toke_kind, _text)| *toke_kind)
429 /// Called when we start processing a span of text that should be highlighted.
430 /// The `Class` argument specifies how it should be highlighted.
431 fn enter_span(out: &mut Buffer, klass: Class) {
432 write!(out, "<span class=\"{}\">", klass.as_html());
435 /// Called at the end of a span of highlighted text.
436 fn exit_span(out: &mut Buffer) {
437 out.write_str("</span>");
440 /// Called for a span of text. If the text should be highlighted differently
441 /// from the surrounding text, then the `Class` argument will be a value other
444 /// The following sequences of callbacks are equivalent:
446 /// enter_span(Foo), string("text", None), exit_span()
447 /// string("text", Foo)
449 /// The latter can be thought of as a shorthand for the former, which is more
451 fn string<T: Display>(out: &mut Buffer, text: T, klass: Option<Class>) {
453 None => write!(out, "{}", text),
454 Some(klass) => write!(out, "<span class=\"{}\">{}</span>", klass.as_html(), text),