1 //! Basic syntax highlighting functionality.
3 //! This module uses librustc_ast's lexer to provide token-based highlighting for
4 //! the HTML documentation generated by rustdoc.
6 //! Use the `render_with_highlighting` to highlight some rust code.
8 use crate::html::escape::Escape;
10 use std::fmt::Display;
11 use std::iter::Peekable;
13 use rustc_lexer::{LiteralKind, TokenKind};
14 use rustc_span::edition::Edition;
15 use rustc_span::symbol::Symbol;
16 use rustc_span::with_default_session_globals;
18 use super::format::Buffer;
20 /// Highlights `src`, returning the HTML output.
21 crate fn render_with_highlighting(
25 playground_button: Option<&str>,
26 tooltip: Option<(Option<Edition>, &str)>,
29 debug!("highlighting: ================\n{}\n==============", src);
30 if let Some((edition_info, class)) = tooltip {
33 "<div class='information'><div class='tooltip {}'{}>ⓘ</div></div>",
35 if let Some(edition_info) = edition_info {
36 format!(" data-edition=\"{}\"", edition_info)
43 write_header(out, class);
44 write_code(out, &src, edition);
45 write_footer(out, playground_button);
48 fn write_header(out: &mut Buffer, class: Option<&str>) {
49 write!(out, "<div class=\"example-wrap\"><pre class=\"rust {}\">\n", class.unwrap_or_default());
52 fn write_code(out: &mut Buffer, src: &str, edition: Edition) {
53 // This replace allows to fix how the code source with DOS backline characters is displayed.
54 let src = src.replace("\r\n", "\n");
55 Classifier::new(&src, edition).highlight(&mut |highlight| {
57 Highlight::Token { text, class } => string(out, Escape(text), class),
58 Highlight::EnterSpan { class } => enter_span(out, class),
59 Highlight::ExitSpan => exit_span(out),
64 fn write_footer(out: &mut Buffer, playground_button: Option<&str>) {
65 write!(out, "</pre>{}</div>\n", playground_button.unwrap_or_default());
68 /// How a span of text is classified. Mostly corresponds to token kinds.
69 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
75 // Keywords that do pointer/reference stuff.
92 /// Returns the css class expected by rustdoc for each `Class`.
93 fn as_html(self) -> &'static str {
95 Class::Comment => "comment",
96 Class::DocComment => "doccomment",
97 Class::Attribute => "attribute",
98 Class::KeyWord => "kw",
99 Class::RefKeyWord => "kw-2",
100 Class::Self_ => "self",
102 Class::Macro => "macro",
103 Class::MacroNonTerminal => "macro-nonterminal",
104 Class::String => "string",
105 Class::Number => "number",
106 Class::Bool => "bool-val",
107 Class::Ident => "ident",
108 Class::Lifetime => "lifetime",
109 Class::PreludeTy => "prelude-ty",
110 Class::PreludeVal => "prelude-val",
111 Class::QuestionMark => "question-mark",
117 Token { text: &'a str, class: Option<Class> },
118 EnterSpan { class: Class },
122 struct TokenIter<'a> {
126 impl Iterator for TokenIter<'a> {
127 type Item = (TokenKind, &'a str);
128 fn next(&mut self) -> Option<(TokenKind, &'a str)> {
129 if self.src.is_empty() {
132 let token = rustc_lexer::first_token(self.src);
133 let (text, rest) = self.src.split_at(token.len);
135 Some((token.kind, text))
139 /// Processes program tokens, classifying strings of text by highlighting
140 /// category (`Class`).
141 struct Classifier<'a> {
142 tokens: Peekable<TokenIter<'a>>,
145 in_macro_nonterminal: bool,
149 impl<'a> Classifier<'a> {
150 fn new(src: &str, edition: Edition) -> Classifier<'_> {
151 let tokens = TokenIter { src }.peekable();
156 in_macro_nonterminal: false,
161 /// Exhausts the `Classifier` writing the output into `sink`.
163 /// The general structure for this method is to iterate over each token,
164 /// possibly giving it an HTML span with a class specifying what flavor of
166 fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'a>)) {
167 with_default_session_globals(|| {
168 while let Some((token, text)) = self.tokens.next() {
169 self.advance(token, text, sink);
174 /// Single step of highlighting. This will classify `token`, but maybe also
175 /// a couple of following ones as well.
176 fn advance(&mut self, token: TokenKind, text: &'a str, sink: &mut dyn FnMut(Highlight<'a>)) {
177 let lookahead = self.peek();
178 let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None });
179 let class = match token {
180 TokenKind::Whitespace => return no_highlight(sink),
181 TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
182 if doc_style.is_some() {
188 // Consider this as part of a macro invocation if there was a
189 // leading identifier.
190 TokenKind::Bang if self.in_macro => {
191 self.in_macro = false;
195 // Assume that '&' or '*' is the reference or dereference operator
196 // or a reference or pointer type. Unless, of course, it looks like
197 // a logical and or a multiplication operator: `&&` or `* `.
198 TokenKind::Star => match lookahead {
199 Some(TokenKind::Whitespace) => Class::Op,
200 _ => Class::RefKeyWord,
202 TokenKind::And => match lookahead {
203 Some(TokenKind::And) => {
204 let _and = self.tokens.next();
205 sink(Highlight::Token { text: "&&", class: Some(Class::Op) });
208 Some(TokenKind::Eq) => {
209 let _eq = self.tokens.next();
210 sink(Highlight::Token { text: "&=", class: Some(Class::Op) });
213 Some(TokenKind::Whitespace) => Class::Op,
214 _ => Class::RefKeyWord,
227 | TokenKind::Gt => Class::Op,
229 // Miscellaneous, no highlighting.
233 | TokenKind::OpenParen
234 | TokenKind::CloseParen
235 | TokenKind::OpenBrace
236 | TokenKind::CloseBrace
237 | TokenKind::OpenBracket
241 | TokenKind::Unknown => return no_highlight(sink),
243 TokenKind::Question => Class::QuestionMark,
245 TokenKind::Dollar => match lookahead {
246 Some(TokenKind::Ident) => {
247 self.in_macro_nonterminal = true;
248 Class::MacroNonTerminal
250 _ => return no_highlight(sink),
253 // This might be the start of an attribute. We're going to want to
254 // continue highlighting it as an attribute until the ending ']' is
255 // seen, so skip out early. Down below we terminate the attribute
256 // span when we see the ']'.
257 TokenKind::Pound => {
259 // Case 1: #![inner_attribute]
260 Some(TokenKind::Bang) => {
261 let _not = self.tokens.next().unwrap();
262 if let Some(TokenKind::OpenBracket) = self.peek() {
263 self.in_attribute = true;
264 sink(Highlight::EnterSpan { class: Class::Attribute });
266 sink(Highlight::Token { text: "#", class: None });
267 sink(Highlight::Token { text: "!", class: None });
270 // Case 2: #[outer_attribute]
271 Some(TokenKind::OpenBracket) => {
272 self.in_attribute = true;
273 sink(Highlight::EnterSpan { class: Class::Attribute });
277 return no_highlight(sink);
279 TokenKind::CloseBracket => {
280 if self.in_attribute {
281 self.in_attribute = false;
282 sink(Highlight::Token { text: "]", class: None });
283 sink(Highlight::ExitSpan);
286 return no_highlight(sink);
288 TokenKind::Literal { kind, .. } => match kind {
290 LiteralKind::Byte { .. }
291 | LiteralKind::Char { .. }
292 | LiteralKind::Str { .. }
293 | LiteralKind::ByteStr { .. }
294 | LiteralKind::RawStr { .. }
295 | LiteralKind::RawByteStr { .. } => Class::String,
297 LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
299 TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
300 self.in_macro = true;
303 TokenKind::Ident => match text {
304 "ref" | "mut" => Class::RefKeyWord,
305 "self" | "Self" => Class::Self_,
306 "false" | "true" => Class::Bool,
307 "Option" | "Result" => Class::PreludeTy,
308 "Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
309 // Keywords are also included in the identifier set.
310 _ if Symbol::intern(text).is_reserved(|| self.edition) => Class::KeyWord,
311 _ if self.in_macro_nonterminal => {
312 self.in_macro_nonterminal = false;
313 Class::MacroNonTerminal
317 TokenKind::RawIdent => Class::Ident,
318 TokenKind::Lifetime { .. } => Class::Lifetime,
320 // Anything that didn't return above is the simple case where we the
321 // class just spans a single token, so we can use the `string` method.
322 sink(Highlight::Token { text, class: Some(class) });
325 fn peek(&mut self) -> Option<TokenKind> {
326 self.tokens.peek().map(|(toke_kind, _text)| *toke_kind)
330 /// Called when we start processing a span of text that should be highlighted.
331 /// The `Class` argument specifies how it should be highlighted.
332 fn enter_span(out: &mut Buffer, klass: Class) {
333 write!(out, "<span class=\"{}\">", klass.as_html());
336 /// Called at the end of a span of highlighted text.
337 fn exit_span(out: &mut Buffer) {
338 out.write_str("</span>");
341 /// Called for a span of text. If the text should be highlighted differently
342 /// from the surrounding text, then the `Class` argument will be a value other
345 /// The following sequences of callbacks are equivalent:
347 /// enter_span(Foo), string("text", None), exit_span()
348 /// string("text", Foo)
350 /// The latter can be thought of as a shorthand for the former, which is more
352 fn string<T: Display>(out: &mut Buffer, text: T, klass: Option<Class>) {
354 None => write!(out, "{}", text),
355 Some(klass) => write!(out, "<span class=\"{}\">{}</span>", klass.as_html(), text),