1 //! Basic syntax highlighting functionality.
3 //! This module uses librustc_ast's lexer to provide token-based highlighting for
4 //! the HTML documentation generated by rustdoc.
6 //! Use the `render_with_highlighting` to highlight some rust code.
9 use crate::html::escape::Escape;
10 use crate::html::render::Context;
12 use std::fmt::{Display, Write};
13 use std::iter::Peekable;
15 use rustc_lexer::{LiteralKind, TokenKind};
16 use rustc_span::edition::Edition;
17 use rustc_span::symbol::Symbol;
19 use super::format::{self, Buffer};
20 use super::render::{LightSpan, LinkFromSrc};
22 /// This type is needed in case we want to render links on items to allow to go to their definition.
23 crate struct ContextInfo<'a, 'b, 'c> {
24 crate context: &'a Context<'b>,
25 /// This represents the "lo" bytes of the current file we're rendering. To get a [`Span`] from
26 /// it, you just need to add add your current byte position in the string and its length (to get
29 /// This is used to create a [`LightSpan`] which is then used as an index in the `span_map` in
30 /// order to retrieve the definition's [`Span`] (which is used to generate the URL).
31 crate file_span_lo: u32,
32 /// This field is used to know "how far" from the top of the directory we are to link to either
33 /// documentation pages or other source pages.
34 crate root_path: &'c str,
37 /// Highlights `src`, returning the HTML output.
38 crate fn render_with_highlighting(
42 playground_button: Option<&str>,
43 tooltip: Option<(Option<Edition>, &str)>,
45 extra_content: Option<Buffer>,
46 context_info: Option<ContextInfo<'_, '_, '_>>,
48 debug!("highlighting: ================\n{}\n==============", src);
49 if let Some((edition_info, class)) = tooltip {
52 "<div class='information'><div class='tooltip {}'{}>ⓘ</div></div>",
54 if let Some(edition_info) = edition_info {
55 format!(" data-edition=\"{}\"", edition_info)
62 write_header(out, class, extra_content);
63 write_code(out, &src, edition, context_info);
64 write_footer(out, playground_button);
67 fn write_header(out: &mut Buffer, class: Option<&str>, extra_content: Option<Buffer>) {
68 write!(out, "<div class=\"example-wrap\">");
69 if let Some(extra) = extra_content {
70 out.push_buffer(extra);
72 if let Some(class) = class {
73 writeln!(out, "<pre class=\"rust {}\">", class);
75 writeln!(out, "<pre class=\"rust\">");
79 /// Convert the given `src` source code into HTML by adding classes for highlighting.
81 /// This code is used to render code blocks (in the documentation) as well as the source code pages.
83 /// Some explanations on the last arguments:
85 /// In case we are rendering a code block and not a source code file, `context_info` will be `None`.
86 /// To put it more simply: if `context_info` is `None`, the code won't try to generate links to an
89 /// More explanations about spans and how we use them here are provided in the
90 /// [`LightSpan::new_in_file`] function documentation about how it works.
95 context_info: Option<ContextInfo<'_, '_, '_>>,
97 // This replace allows to fix how the code source with DOS backline characters is displayed.
98 let src = src.replace("\r\n", "\n");
99 Classifier::new(&src, edition, context_info.as_ref().map(|c| c.file_span_lo).unwrap_or(0))
100 .highlight(&mut |highlight| {
102 Highlight::Token { text, class } => string(out, Escape(text), class, &context_info),
103 Highlight::EnterSpan { class } => enter_span(out, class),
104 Highlight::ExitSpan => exit_span(out),
109 fn write_footer(out: &mut Buffer, playground_button: Option<&str>) {
110 writeln!(out, "</pre>{}</div>", playground_button.unwrap_or_default());
113 /// How a span of text is classified. Mostly corresponds to token kinds.
114 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
120 // Keywords that do pointer/reference stuff.
137 /// Returns the css class expected by rustdoc for each `Class`.
138 fn as_html(self) -> &'static str {
140 Class::Comment => "comment",
141 Class::DocComment => "doccomment",
142 Class::Attribute => "attribute",
143 Class::KeyWord => "kw",
144 Class::RefKeyWord => "kw-2",
145 Class::Self_(_) => "self",
147 Class::Macro => "macro",
148 Class::MacroNonTerminal => "macro-nonterminal",
149 Class::String => "string",
150 Class::Number => "number",
151 Class::Bool => "bool-val",
152 Class::Ident(_) => "ident",
153 Class::Lifetime => "lifetime",
154 Class::PreludeTy => "prelude-ty",
155 Class::PreludeVal => "prelude-val",
156 Class::QuestionMark => "question-mark",
160 /// In case this is an item which can be converted into a link to a definition, it'll contain
161 /// a "span" (a tuple representing `(lo, hi)` equivalent of `Span`).
162 fn get_span(self) -> Option<LightSpan> {
164 Self::Ident(sp) | Self::Self_(sp) => Some(sp),
171 Token { text: &'a str, class: Option<Class> },
172 EnterSpan { class: Class },
176 struct TokenIter<'a> {
180 impl Iterator for TokenIter<'a> {
181 type Item = (TokenKind, &'a str);
182 fn next(&mut self) -> Option<(TokenKind, &'a str)> {
183 if self.src.is_empty() {
186 let token = rustc_lexer::first_token(self.src);
187 let (text, rest) = self.src.split_at(token.len);
189 Some((token.kind, text))
193 /// Classifies into identifier class; returns `None` if this is a non-keyword identifier.
194 fn get_real_ident_class(text: &str, edition: Edition, allow_path_keywords: bool) -> Option<Class> {
195 let ignore: &[&str] =
196 if allow_path_keywords { &["self", "Self", "super", "crate"] } else { &["self", "Self"] };
197 if ignore.iter().any(|k| *k == text) {
201 "ref" | "mut" => Class::RefKeyWord,
202 "false" | "true" => Class::Bool,
203 _ if Symbol::intern(text).is_reserved(|| edition) => Class::KeyWord,
208 /// Processes program tokens, classifying strings of text by highlighting
209 /// category (`Class`).
210 struct Classifier<'a> {
211 tokens: Peekable<TokenIter<'a>>,
214 in_macro_nonterminal: bool,
221 impl<'a> Classifier<'a> {
222 /// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
223 /// file "lo" byte which we be used later on by the `span_correspondance_map`. More explanations
224 /// are provided in the [`LightSpan::new_in_file`] function documentation about how it works.
225 fn new(src: &str, edition: Edition, file_span_lo: u32) -> Classifier<'_> {
226 let tokens = TokenIter { src }.peekable();
231 in_macro_nonterminal: false,
239 /// Concatenate colons and idents as one when possible.
240 fn get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)> {
241 let start = self.byte_pos as usize;
243 let mut has_ident = false;
244 let edition = self.edition;
248 while let Some((TokenKind::Colon, _)) = self.tokens.peek() {
252 // Ident path can start with "::" but if we already have content in the ident path,
253 // the "::" is mandatory.
254 if has_ident && nb == 0 {
255 return vec![(TokenKind::Ident, start, pos)];
256 } else if nb != 0 && nb != 2 {
258 return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
260 return vec![(TokenKind::Colon, start, pos + nb)];
264 if let Some((None, text)) = self.tokens.peek().map(|(token, text)| {
265 if *token == TokenKind::Ident {
266 let class = get_real_ident_class(text, edition, true);
269 // Doesn't matter which Class we put in here...
270 (Some(Class::Comment), text)
273 // We only "add" the colon if there is an ident behind.
274 pos += text.len() + nb;
277 } else if nb > 0 && has_ident {
278 return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
280 return vec![(TokenKind::Colon, start, start + nb)];
281 } else if has_ident {
282 return vec![(TokenKind::Ident, start, pos)];
289 /// Wraps the tokens iteration to ensure that the `byte_pos` is always correct.
291 /// It returns the token's kind, the token as a string and its byte position in the source
293 fn next(&mut self) -> Option<(TokenKind, &'a str, u32)> {
294 if let Some((kind, text)) = self.tokens.next() {
295 let before = self.byte_pos;
296 self.byte_pos += text.len() as u32;
297 Some((kind, text, before))
303 /// Exhausts the `Classifier` writing the output into `sink`.
305 /// The general structure for this method is to iterate over each token,
306 /// possibly giving it an HTML span with a class specifying what flavor of
308 fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'a>)) {
313 .map(|t| matches!(t.0, TokenKind::Colon | TokenKind::Ident))
316 let tokens = self.get_full_ident_path();
317 let skip = !tokens.is_empty();
318 for (token, start, end) in tokens {
319 let text = &self.src[start..end];
320 self.advance(token, text, sink, start as u32);
321 self.byte_pos += text.len() as u32;
327 if let Some((token, text, before)) = self.next() {
328 self.advance(token, text, sink, before);
335 /// Single step of highlighting. This will classify `token`, but maybe also a couple of
336 /// following ones as well.
338 /// `before` is the position of the given token in the `source` string and is used as "lo" byte
339 /// in case we want to try to generate a link for this token using the
340 /// `span_correspondance_map`.
345 sink: &mut dyn FnMut(Highlight<'a>),
348 let lookahead = self.peek();
349 let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None });
350 let class = match token {
351 TokenKind::Whitespace => return no_highlight(sink),
352 TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
353 if doc_style.is_some() {
359 // Consider this as part of a macro invocation if there was a
360 // leading identifier.
361 TokenKind::Bang if self.in_macro => {
362 self.in_macro = false;
363 sink(Highlight::Token { text, class: None });
364 sink(Highlight::ExitSpan);
368 // Assume that '&' or '*' is the reference or dereference operator
369 // or a reference or pointer type. Unless, of course, it looks like
370 // a logical and or a multiplication operator: `&&` or `* `.
371 TokenKind::Star => match lookahead {
372 Some(TokenKind::Whitespace) => Class::Op,
373 _ => Class::RefKeyWord,
375 TokenKind::And => match lookahead {
376 Some(TokenKind::And) => {
378 sink(Highlight::Token { text: "&&", class: Some(Class::Op) });
381 Some(TokenKind::Eq) => {
383 sink(Highlight::Token { text: "&=", class: Some(Class::Op) });
386 Some(TokenKind::Whitespace) => Class::Op,
387 _ => Class::RefKeyWord,
400 | TokenKind::Gt => Class::Op,
402 // Miscellaneous, no highlighting.
406 | TokenKind::OpenParen
407 | TokenKind::CloseParen
408 | TokenKind::OpenBrace
409 | TokenKind::CloseBrace
410 | TokenKind::OpenBracket
414 | TokenKind::Unknown => return no_highlight(sink),
416 TokenKind::Question => Class::QuestionMark,
418 TokenKind::Dollar => match lookahead {
419 Some(TokenKind::Ident) => {
420 self.in_macro_nonterminal = true;
421 Class::MacroNonTerminal
423 _ => return no_highlight(sink),
426 // This might be the start of an attribute. We're going to want to
427 // continue highlighting it as an attribute until the ending ']' is
428 // seen, so skip out early. Down below we terminate the attribute
429 // span when we see the ']'.
430 TokenKind::Pound => {
432 // Case 1: #![inner_attribute]
433 Some(TokenKind::Bang) => {
435 if let Some(TokenKind::OpenBracket) = self.peek() {
436 self.in_attribute = true;
437 sink(Highlight::EnterSpan { class: Class::Attribute });
439 sink(Highlight::Token { text: "#", class: None });
440 sink(Highlight::Token { text: "!", class: None });
443 // Case 2: #[outer_attribute]
444 Some(TokenKind::OpenBracket) => {
445 self.in_attribute = true;
446 sink(Highlight::EnterSpan { class: Class::Attribute });
450 return no_highlight(sink);
452 TokenKind::CloseBracket => {
453 if self.in_attribute {
454 self.in_attribute = false;
455 sink(Highlight::Token { text: "]", class: None });
456 sink(Highlight::ExitSpan);
459 return no_highlight(sink);
461 TokenKind::Literal { kind, .. } => match kind {
463 LiteralKind::Byte { .. }
464 | LiteralKind::Char { .. }
465 | LiteralKind::Str { .. }
466 | LiteralKind::ByteStr { .. }
467 | LiteralKind::RawStr { .. }
468 | LiteralKind::RawByteStr { .. } => Class::String,
470 LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
472 TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
473 self.in_macro = true;
474 sink(Highlight::EnterSpan { class: Class::Macro });
475 sink(Highlight::Token { text, class: None });
478 TokenKind::Ident => match get_real_ident_class(text, self.edition, false) {
480 "Option" | "Result" => Class::PreludeTy,
481 "Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
482 _ if self.in_macro_nonterminal => {
483 self.in_macro_nonterminal = false;
484 Class::MacroNonTerminal
486 "self" | "Self" => Class::Self_(LightSpan::new_in_file(
489 before + text.len() as u32,
491 _ => Class::Ident(LightSpan::new_in_file(
494 before + text.len() as u32,
499 TokenKind::RawIdent | TokenKind::UnknownPrefix => Class::Ident(LightSpan::new_in_file(
502 before + text.len() as u32,
504 TokenKind::Lifetime { .. } => Class::Lifetime,
506 // Anything that didn't return above is the simple case where we the
507 // class just spans a single token, so we can use the `string` method.
508 sink(Highlight::Token { text, class: Some(class) });
511 fn peek(&mut self) -> Option<TokenKind> {
512 self.tokens.peek().map(|(toke_kind, _text)| *toke_kind)
516 /// Called when we start processing a span of text that should be highlighted.
517 /// The `Class` argument specifies how it should be highlighted.
518 fn enter_span(out: &mut Buffer, klass: Class) {
519 write!(out, "<span class=\"{}\">", klass.as_html());
522 /// Called at the end of a span of highlighted text.
523 fn exit_span(out: &mut Buffer) {
524 out.write_str("</span>");
527 /// Called for a span of text. If the text should be highlighted differently
528 /// from the surrounding text, then the `Class` argument will be a value other
531 /// The following sequences of callbacks are equivalent:
533 /// enter_span(Foo), string("text", None), exit_span()
534 /// string("text", Foo)
537 /// The latter can be thought of as a shorthand for the former, which is more
540 /// Note that if `context` is not `None` and that the given `klass` contains a `Span`, the function
541 /// will then try to find this `span` in the `span_correspondance_map`. If found, it'll then
542 /// generate a link for this element (which corresponds to where its definition is located).
543 fn string<T: Display>(
546 klass: Option<Class>,
547 context_info: &Option<ContextInfo<'_, '_, '_>>,
549 let klass = match klass {
550 None => return write!(out, "{}", text),
551 Some(klass) => klass,
553 if let Some(def_span) = klass.get_span() {
554 let mut text = text.to_string();
555 if text.contains("::") {
556 text = text.split("::").intersperse("::").fold(String::new(), |mut path, t| {
558 "self" | "Self" => write!(
560 "<span class=\"{}\">{}</span>",
561 Class::Self_(LightSpan::empty()).as_html(),
564 "crate" | "super" => write!(
566 "<span class=\"{}\">{}</span>",
567 Class::KeyWord.as_html(),
570 t => write!(&mut path, "{}", t),
572 .expect("Failed to build source HTML path");
576 if let Some(context_info) = context_info {
577 if let Some(href) = context_info
580 .span_correspondance_map
583 let context = context_info.context;
585 LinkFromSrc::Local(span) => {
587 .href_from_span(clean::Span::wrap_raw(*span))
588 .map(|s| format!("{}{}", context_info.root_path, s))
590 LinkFromSrc::External(def_id) => {
591 format::href(*def_id, context).map(|(url, _, _)| url)
596 write!(out, "<a class=\"{}\" href=\"{}\">{}</a>", klass.as_html(), href, text);
601 write!(out, "<span class=\"{}\">{}</span>", klass.as_html(), text);