1 //! Basic syntax highlighting functionality.
3 //! This module uses librustc_ast's lexer to provide token-based highlighting for
4 //! the HTML documentation generated by rustdoc.
6 //! Use the `render_with_highlighting` to highlight some rust code.
8 use crate::clean::PrimitiveType;
9 use crate::html::escape::Escape;
10 use crate::html::render::Context;
12 use std::collections::VecDeque;
13 use std::fmt::{Display, Write};
15 use rustc_lexer::{LiteralKind, TokenKind};
16 use rustc_span::edition::Edition;
17 use rustc_span::symbol::Symbol;
18 use rustc_span::{BytePos, Span, DUMMY_SP};
20 use super::format::{self, Buffer};
21 use super::render::LinkFromSrc;
23 /// This type is needed in case we want to render links on items to allow to go to their definition.
24 crate struct ContextInfo<'a, 'b, 'c> {
25 crate context: &'a Context<'b>,
26 /// This span contains the current file we're going through.
27 crate file_span: Span,
28 /// This field is used to know "how far" from the top of the directory we are to link to either
29 /// documentation pages or other source pages.
30 crate root_path: &'c str,
33 /// Highlights `src`, returning the HTML output.
34 crate fn render_with_highlighting(
38 playground_button: Option<&str>,
39 tooltip: Option<(Option<Edition>, &str)>,
41 extra_content: Option<Buffer>,
42 context_info: Option<ContextInfo<'_, '_, '_>>,
44 debug!("highlighting: ================\n{}\n==============", src);
45 if let Some((edition_info, class)) = tooltip {
48 "<div class='information'><div class='tooltip {}'{}>ⓘ</div></div>",
50 if let Some(edition_info) = edition_info {
51 format!(" data-edition=\"{}\"", edition_info)
58 write_header(out, class, extra_content);
59 write_code(out, &src, edition, context_info);
60 write_footer(out, playground_button);
63 fn write_header(out: &mut Buffer, class: Option<&str>, extra_content: Option<Buffer>) {
64 write!(out, "<div class=\"example-wrap\">");
65 if let Some(extra) = extra_content {
66 out.push_buffer(extra);
68 if let Some(class) = class {
69 write!(out, "<pre class=\"rust {}\">", class);
71 write!(out, "<pre class=\"rust\">");
73 write!(out, "<code>");
76 /// Convert the given `src` source code into HTML by adding classes for highlighting.
78 /// This code is used to render code blocks (in the documentation) as well as the source code pages.
80 /// Some explanations on the last arguments:
82 /// In case we are rendering a code block and not a source code file, `context_info` will be `None`.
83 /// To put it more simply: if `context_info` is `None`, the code won't try to generate links to an
86 /// More explanations about spans and how we use them here are provided in the
91 context_info: Option<ContextInfo<'_, '_, '_>>,
93 // This replace allows to fix how the code source with DOS backline characters is displayed.
94 let src = src.replace("\r\n", "\n");
95 Classifier::new(&src, edition, context_info.as_ref().map(|c| c.file_span).unwrap_or(DUMMY_SP))
96 .highlight(&mut |highlight| {
98 Highlight::Token { text, class } => string(out, Escape(text), class, &context_info),
99 Highlight::EnterSpan { class } => enter_span(out, class),
100 Highlight::ExitSpan => exit_span(out),
105 fn write_footer(out: &mut Buffer, playground_button: Option<&str>) {
106 writeln!(out, "</code></pre>{}</div>", playground_button.unwrap_or_default());
109 /// How a span of text is classified. Mostly corresponds to token kinds.
110 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
116 // Keywords that do pointer/reference stuff.
133 /// Returns the css class expected by rustdoc for each `Class`.
134 fn as_html(self) -> &'static str {
136 Class::Comment => "comment",
137 Class::DocComment => "doccomment",
138 Class::Attribute => "attribute",
139 Class::KeyWord => "kw",
140 Class::RefKeyWord => "kw-2",
141 Class::Self_(_) => "self",
143 Class::Macro => "macro",
144 Class::MacroNonTerminal => "macro-nonterminal",
145 Class::String => "string",
146 Class::Number => "number",
147 Class::Bool => "bool-val",
148 Class::Ident(_) => "ident",
149 Class::Lifetime => "lifetime",
150 Class::PreludeTy => "prelude-ty",
151 Class::PreludeVal => "prelude-val",
152 Class::QuestionMark => "question-mark",
156 /// In case this is an item which can be converted into a link to a definition, it'll contain
157 /// a "span" (a tuple representing `(lo, hi)` equivalent of `Span`).
158 fn get_span(self) -> Option<Span> {
160 Self::Ident(sp) | Self::Self_(sp) => Some(sp),
167 Token { text: &'a str, class: Option<Class> },
168 EnterSpan { class: Class },
172 struct TokenIter<'a> {
176 impl Iterator for TokenIter<'a> {
177 type Item = (TokenKind, &'a str);
178 fn next(&mut self) -> Option<(TokenKind, &'a str)> {
179 if self.src.is_empty() {
182 let token = rustc_lexer::first_token(self.src);
183 let (text, rest) = self.src.split_at(token.len);
185 Some((token.kind, text))
189 /// Classifies into identifier class; returns `None` if this is a non-keyword identifier.
190 fn get_real_ident_class(text: &str, edition: Edition, allow_path_keywords: bool) -> Option<Class> {
191 let ignore: &[&str] =
192 if allow_path_keywords { &["self", "Self", "super", "crate"] } else { &["self", "Self"] };
193 if ignore.iter().any(|k| *k == text) {
197 "ref" | "mut" => Class::RefKeyWord,
198 "false" | "true" => Class::Bool,
199 _ if Symbol::intern(text).is_reserved(|| edition) => Class::KeyWord,
204 /// This iterator comes from the same idea than "Peekable" except that it allows to "peek" more than
205 /// just the next item by using `peek_next`. The `peek` method always returns the next item after
206 /// the current one whereas `peek_next` will return the next item after the last one peeked.
208 /// You can use both `peek` and `peek_next` at the same time without problem.
209 struct PeekIter<'a> {
210 stored: VecDeque<(TokenKind, &'a str)>,
211 /// This position is reinitialized when using `next`. It is used in `peek_next`.
217 fn new(iter: TokenIter<'a>) -> Self {
218 Self { stored: VecDeque::new(), peek_pos: 0, iter }
220 /// Returns the next item after the current one. It doesn't interfer with `peek_next` output.
221 fn peek(&mut self) -> Option<&(TokenKind, &'a str)> {
222 if self.stored.is_empty() {
223 if let Some(next) = self.iter.next() {
224 self.stored.push_back(next);
229 /// Returns the next item after the last one peeked. It doesn't interfer with `peek` output.
230 fn peek_next(&mut self) -> Option<&(TokenKind, &'a str)> {
232 if self.peek_pos - 1 < self.stored.len() {
233 self.stored.get(self.peek_pos - 1)
234 } else if let Some(next) = self.iter.next() {
235 self.stored.push_back(next);
243 impl Iterator for PeekIter<'a> {
244 type Item = (TokenKind, &'a str);
245 fn next(&mut self) -> Option<Self::Item> {
247 if let Some(first) = self.stored.pop_front() { Some(first) } else { self.iter.next() }
251 /// Processes program tokens, classifying strings of text by highlighting
252 /// category (`Class`).
253 struct Classifier<'a> {
254 tokens: PeekIter<'a>,
257 in_macro_nonterminal: bool,
264 impl<'a> Classifier<'a> {
265 /// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
266 /// file span which will be used later on by the `span_correspondance_map`.
267 fn new(src: &str, edition: Edition, file_span: Span) -> Classifier<'_> {
268 let tokens = PeekIter::new(TokenIter { src });
273 in_macro_nonterminal: false,
281 /// Convenient wrapper to create a [`Span`] from a position in the file.
282 fn new_span(&self, lo: u32, text: &str) -> Span {
283 let hi = lo + text.len() as u32;
284 let file_lo = self.file_span.lo();
285 self.file_span.with_lo(file_lo + BytePos(lo)).with_hi(file_lo + BytePos(hi))
288 /// Concatenate colons and idents as one when possible.
289 fn get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)> {
290 let start = self.byte_pos as usize;
292 let mut has_ident = false;
293 let edition = self.edition;
297 while let Some((TokenKind::Colon, _)) = self.tokens.peek() {
301 // Ident path can start with "::" but if we already have content in the ident path,
302 // the "::" is mandatory.
303 if has_ident && nb == 0 {
304 return vec![(TokenKind::Ident, start, pos)];
305 } else if nb != 0 && nb != 2 {
307 return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
309 return vec![(TokenKind::Colon, start, pos + nb)];
313 if let Some((None, text)) = self.tokens.peek().map(|(token, text)| {
314 if *token == TokenKind::Ident {
315 let class = get_real_ident_class(text, edition, true);
318 // Doesn't matter which Class we put in here...
319 (Some(Class::Comment), text)
322 // We only "add" the colon if there is an ident behind.
323 pos += text.len() + nb;
326 } else if nb > 0 && has_ident {
327 return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
329 return vec![(TokenKind::Colon, start, start + nb)];
330 } else if has_ident {
331 return vec![(TokenKind::Ident, start, pos)];
338 /// Wraps the tokens iteration to ensure that the `byte_pos` is always correct.
340 /// It returns the token's kind, the token as a string and its byte position in the source
342 fn next(&mut self) -> Option<(TokenKind, &'a str, u32)> {
343 if let Some((kind, text)) = self.tokens.next() {
344 let before = self.byte_pos;
345 self.byte_pos += text.len() as u32;
346 Some((kind, text, before))
352 /// Exhausts the `Classifier` writing the output into `sink`.
354 /// The general structure for this method is to iterate over each token,
355 /// possibly giving it an HTML span with a class specifying what flavor of
357 fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'a>)) {
362 .map(|t| matches!(t.0, TokenKind::Colon | TokenKind::Ident))
365 let tokens = self.get_full_ident_path();
366 for (token, start, end) in &tokens {
367 let text = &self.src[*start..*end];
368 self.advance(*token, text, sink, *start as u32);
369 self.byte_pos += text.len() as u32;
371 if !tokens.is_empty() {
375 if let Some((token, text, before)) = self.next() {
376 self.advance(token, text, sink, before);
383 /// Single step of highlighting. This will classify `token`, but maybe also a couple of
384 /// following ones as well.
386 /// `before` is the position of the given token in the `source` string and is used as "lo" byte
387 /// in case we want to try to generate a link for this token using the
388 /// `span_correspondance_map`.
393 sink: &mut dyn FnMut(Highlight<'a>),
396 let lookahead = self.peek();
397 let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None });
398 let class = match token {
399 TokenKind::Whitespace => return no_highlight(sink),
400 TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
401 if doc_style.is_some() {
407 // Consider this as part of a macro invocation if there was a
408 // leading identifier.
409 TokenKind::Bang if self.in_macro => {
410 self.in_macro = false;
411 sink(Highlight::Token { text, class: None });
412 sink(Highlight::ExitSpan);
416 // Assume that '&' or '*' is the reference or dereference operator
417 // or a reference or pointer type. Unless, of course, it looks like
418 // a logical and or a multiplication operator: `&&` or `* `.
419 TokenKind::Star => match self.peek() {
420 Some(TokenKind::Whitespace) => Class::Op,
421 _ => Class::RefKeyWord,
423 TokenKind::And => match lookahead {
424 Some(TokenKind::And) => {
426 sink(Highlight::Token { text: "&&", class: Some(Class::Op) });
429 Some(TokenKind::Eq) => {
431 sink(Highlight::Token { text: "&=", class: Some(Class::Op) });
434 Some(TokenKind::Whitespace) => Class::Op,
435 _ => Class::RefKeyWord,
438 // These can either be operators, or arrows.
439 TokenKind::Eq => match lookahead {
440 Some(TokenKind::Eq) => {
442 sink(Highlight::Token { text: "==", class: Some(Class::Op) });
445 Some(TokenKind::Gt) => {
447 sink(Highlight::Token { text: "=>", class: None });
452 TokenKind::Minus if lookahead == Some(TokenKind::Gt) => {
454 sink(Highlight::Token { text: "->", class: None });
467 | TokenKind::Gt => Class::Op,
469 // Miscellaneous, no highlighting.
473 | TokenKind::OpenParen
474 | TokenKind::CloseParen
475 | TokenKind::OpenBrace
476 | TokenKind::CloseBrace
477 | TokenKind::OpenBracket
481 | TokenKind::Unknown => return no_highlight(sink),
483 TokenKind::Question => Class::QuestionMark,
485 TokenKind::Dollar => match lookahead {
486 Some(TokenKind::Ident) => {
487 self.in_macro_nonterminal = true;
488 Class::MacroNonTerminal
490 _ => return no_highlight(sink),
493 // This might be the start of an attribute. We're going to want to
494 // continue highlighting it as an attribute until the ending ']' is
495 // seen, so skip out early. Down below we terminate the attribute
496 // span when we see the ']'.
497 TokenKind::Pound => {
499 // Case 1: #![inner_attribute]
500 Some(TokenKind::Bang) => {
502 if let Some(TokenKind::OpenBracket) = self.peek() {
503 self.in_attribute = true;
504 sink(Highlight::EnterSpan { class: Class::Attribute });
506 sink(Highlight::Token { text: "#", class: None });
507 sink(Highlight::Token { text: "!", class: None });
510 // Case 2: #[outer_attribute]
511 Some(TokenKind::OpenBracket) => {
512 self.in_attribute = true;
513 sink(Highlight::EnterSpan { class: Class::Attribute });
517 return no_highlight(sink);
519 TokenKind::CloseBracket => {
520 if self.in_attribute {
521 self.in_attribute = false;
522 sink(Highlight::Token { text: "]", class: None });
523 sink(Highlight::ExitSpan);
526 return no_highlight(sink);
528 TokenKind::Literal { kind, .. } => match kind {
530 LiteralKind::Byte { .. }
531 | LiteralKind::Char { .. }
532 | LiteralKind::Str { .. }
533 | LiteralKind::ByteStr { .. }
534 | LiteralKind::RawStr { .. }
535 | LiteralKind::RawByteStr { .. } => Class::String,
537 LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
539 TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
540 self.in_macro = true;
541 sink(Highlight::EnterSpan { class: Class::Macro });
542 sink(Highlight::Token { text, class: None });
545 TokenKind::Ident => match get_real_ident_class(text, self.edition, false) {
547 "Option" | "Result" => Class::PreludeTy,
548 "Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
549 // "union" is a weak keyword and is only considered as a keyword when declaring
551 "union" if self.check_if_is_union_keyword() => Class::KeyWord,
552 _ if self.in_macro_nonterminal => {
553 self.in_macro_nonterminal = false;
554 Class::MacroNonTerminal
556 "self" | "Self" => Class::Self_(self.new_span(before, text)),
557 _ => Class::Ident(self.new_span(before, text)),
561 TokenKind::RawIdent | TokenKind::UnknownPrefix => {
562 Class::Ident(self.new_span(before, text))
564 TokenKind::Lifetime { .. } => Class::Lifetime,
566 // Anything that didn't return above is the simple case where we the
567 // class just spans a single token, so we can use the `string` method.
568 sink(Highlight::Token { text, class: Some(class) });
571 fn peek(&mut self) -> Option<TokenKind> {
572 self.tokens.peek().map(|(token_kind, _text)| *token_kind)
575 fn check_if_is_union_keyword(&mut self) -> bool {
576 while let Some(kind) = self.tokens.peek_next().map(|(token_kind, _text)| token_kind) {
577 if *kind == TokenKind::Whitespace {
580 return *kind == TokenKind::Ident;
586 /// Called when we start processing a span of text that should be highlighted.
587 /// The `Class` argument specifies how it should be highlighted.
588 fn enter_span(out: &mut Buffer, klass: Class) {
589 write!(out, "<span class=\"{}\">", klass.as_html());
592 /// Called at the end of a span of highlighted text.
593 fn exit_span(out: &mut Buffer) {
594 out.write_str("</span>");
597 /// Called for a span of text. If the text should be highlighted differently
598 /// from the surrounding text, then the `Class` argument will be a value other
601 /// The following sequences of callbacks are equivalent:
603 /// enter_span(Foo), string("text", None), exit_span()
604 /// string("text", Foo)
607 /// The latter can be thought of as a shorthand for the former, which is more
610 /// Note that if `context` is not `None` and that the given `klass` contains a `Span`, the function
611 /// will then try to find this `span` in the `span_correspondance_map`. If found, it'll then
612 /// generate a link for this element (which corresponds to where its definition is located).
613 fn string<T: Display>(
616 klass: Option<Class>,
617 context_info: &Option<ContextInfo<'_, '_, '_>>,
619 let klass = match klass {
620 None => return write!(out, "{}", text),
621 Some(klass) => klass,
623 let def_span = match klass.get_span() {
626 write!(out, "<span class=\"{}\">{}</span>", klass.as_html(), text);
630 let mut text_s = text.to_string();
631 if text_s.contains("::") {
632 text_s = text_s.split("::").intersperse("::").fold(String::new(), |mut path, t| {
634 "self" | "Self" => write!(
636 "<span class=\"{}\">{}</span>",
637 Class::Self_(DUMMY_SP).as_html(),
640 "crate" | "super" => {
641 write!(&mut path, "<span class=\"{}\">{}</span>", Class::KeyWord.as_html(), t)
643 t => write!(&mut path, "{}", t),
645 .expect("Failed to build source HTML path");
649 if let Some(context_info) = context_info {
651 context_info.context.shared.span_correspondance_map.get(&def_span).and_then(|href| {
652 let context = context_info.context;
653 // FIXME: later on, it'd be nice to provide two links (if possible) for all items:
654 // one to the documentation page and one to the source definition.
655 // FIXME: currently, external items only generate a link to their documentation,
656 // a link to their definition can be generated using this:
657 // https://github.com/rust-lang/rust/blob/60f1a2fc4b535ead9c85ce085fdce49b1b097531/src/librustdoc/html/render/context.rs#L315-L338
659 LinkFromSrc::Local(span) => context
660 .href_from_span(*span)
661 .map(|s| format!("{}{}", context_info.root_path, s)),
662 LinkFromSrc::External(def_id) => {
663 format::href_with_root_path(*def_id, context, Some(context_info.root_path))
665 .map(|(url, _, _)| url)
667 LinkFromSrc::Primitive(prim) => format::href_with_root_path(
668 PrimitiveType::primitive_locations(context.tcx())[&prim],
670 Some(context_info.root_path),
673 .map(|(url, _, _)| url),
677 write!(out, "<a class=\"{}\" href=\"{}\">{}</a>", klass.as_html(), href, text_s);
681 write!(out, "<span class=\"{}\">{}</span>", klass.as_html(), text_s);