src/librustdoc/html/highlight.rs

   1 //! Basic syntax highlighting functionality.
   2 //!
   3 //! This module uses libsyntax's lexer to provide token-based highlighting for
   4 //! the HTML documentation generated by rustdoc.
   5 //!
   6 //! Use the `render_with_highlighting` to highlight some rust code.
   7
   8 use html::escape::Escape;
   9
  10 use std::fmt::Display;
  11 use std::io;
  12 use std::io::prelude::*;
  13
  14 use syntax::source_map::{SourceMap, FilePathMapping};
  15 use syntax::parse::lexer::{self, TokenAndSpan};
  16 use syntax::parse::token;
  17 use syntax::parse;
  18 use syntax_pos::{Span, FileName};
  19
  20 /// Highlights `src`, returning the HTML output.
  21 pub fn render_with_highlighting(
  22     src: &str,
  23     class: Option<&str>,
  24     extension: Option<&str>,
  25     tooltip: Option<(&str, &str)>,
  26 ) -> String {
  27     debug!("highlighting: ================\n{}\n==============", src);
  28     let mut out = Vec::new();
  29     if let Some((tooltip, class)) = tooltip {
  30         write!(out, "<div class='information'><div class='tooltip {}'>ⓘ<span \
  31                      class='tooltiptext'>{}</span></div></div>",
  32                class, tooltip).unwrap();
  33     }
  34
  35     let sess = parse::ParseSess::new(FilePathMapping::empty());
  36     let fm = sess.source_map().new_source_file(
  37         FileName::Custom(String::from("rustdoc-highlighting")),
  38         src.to_owned(),
  39     );
  40     let highlight_result =
  41         lexer::StringReader::new_or_buffered_errs(&sess, fm, None).and_then(|lexer| {
  42             let mut classifier = Classifier::new(lexer, sess.source_map());
  43
  44             let mut highlighted_source = vec![];
  45             if classifier.write_source(&mut highlighted_source).is_err() {
  46                 Err(classifier.lexer.buffer_fatal_errors())
  47             } else {
  48                 Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
  49             }
  50         });
  51
  52     match highlight_result {
  53         Ok(highlighted_source) => {
  54             write_header(class, &mut out).unwrap();
  55             write!(out, "{}", highlighted_source).unwrap();
  56             if let Some(extension) = extension {
  57                 write!(out, "{}", extension).unwrap();
  58             }
  59             write_footer(&mut out).unwrap();
  60         }
  61         Err(errors) => {
  62             // If errors are encountered while trying to highlight, cancel the errors and just emit
  63             // the unhighlighted source. The errors will have already been reported in the
  64             // `check-code-block-syntax` pass.
  65             for mut error in errors {
  66                 error.cancel();
  67             }
  68
  69             write!(out, "<pre><code>{}</code></pre>", src).unwrap();
  70         }
  71     }
  72
  73     String::from_utf8_lossy(&out[..]).into_owned()
  74 }
  75
  76 /// Processes a program (nested in the internal `lexer`), classifying strings of
  77 /// text by highlighting category (`Class`). Calls out to a `Writer` to write
  78 /// each span of text in sequence.
  79 struct Classifier<'a> {
  80     lexer: lexer::StringReader<'a>,
  81     source_map: &'a SourceMap,
  82
  83     // State of the classifier.
  84     in_attribute: bool,
  85     in_macro: bool,
  86     in_macro_nonterminal: bool,
  87 }
  88
  89 /// How a span of text is classified. Mostly corresponds to token kinds.
  90 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
  91 enum Class {
  92     None,
  93     Comment,
  94     DocComment,
  95     Attribute,
  96     KeyWord,
  97     // Keywords that do pointer/reference stuff.
  98     RefKeyWord,
  99     Self_,
 100     Op,
 101     Macro,
 102     MacroNonTerminal,
 103     String,
 104     Number,
 105     Bool,
 106     Ident,
 107     Lifetime,
 108     PreludeTy,
 109     PreludeVal,
 110     QuestionMark,
 111 }
 112
 113 /// Trait that controls writing the output of syntax highlighting. Users should
 114 /// implement this trait to customize writing output.
 115 ///
 116 /// The classifier will call into the `Writer` implementation as it finds spans
 117 /// of text to highlight. Exactly how that text should be highlighted is up to
 118 /// the implementation.
 119 trait Writer {
 120     /// Called when we start processing a span of text that should be highlighted.
 121     /// The `Class` argument specifies how it should be highlighted.
 122     fn enter_span(&mut self, _: Class) -> io::Result<()>;
 123
 124     /// Called at the end of a span of highlighted text.
 125     fn exit_span(&mut self) -> io::Result<()>;
 126
 127     /// Called for a span of text. If the text should be highlighted differently from the
 128     /// surrounding text, then the `Class` argument will be a value other than `None`.
 129     ///
 130     /// The following sequences of callbacks are equivalent:
 131     /// ```plain
 132     ///     enter_span(Foo), string("text", None), exit_span()
 133     ///     string("text", Foo)
 134     /// ```
 135     /// The latter can be thought of as a shorthand for the former, which is
 136     /// more flexible.
 137     fn string<T: Display>(&mut self,
 138                           text: T,
 139                           klass: Class)
 140                           -> io::Result<()>;
 141 }
 142
 143 // Implement `Writer` for anthing that can be written to, this just implements
 144 // the default rustdoc behaviour.
 145 impl<U: Write> Writer for U {
 146     fn string<T: Display>(&mut self,
 147                           text: T,
 148                           klass: Class)
 149                           -> io::Result<()> {
 150         match klass {
 151             Class::None => write!(self, "{}", text),
 152             klass => write!(self, "<span class=\"{}\">{}</span>", klass.rustdoc_class(), text),
 153         }
 154     }
 155
 156     fn enter_span(&mut self, klass: Class) -> io::Result<()> {
 157         write!(self, "<span class=\"{}\">", klass.rustdoc_class())
 158     }
 159
 160     fn exit_span(&mut self) -> io::Result<()> {
 161         write!(self, "</span>")
 162     }
 163 }
 164
 165 enum HighlightError {
 166     LexError,
 167     IoError(io::Error),
 168 }
 169
 170 impl From<io::Error> for HighlightError {
 171     fn from(err: io::Error) -> Self {
 172         HighlightError::IoError(err)
 173     }
 174 }
 175
 176 impl<'a> Classifier<'a> {
 177     fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> {
 178         Classifier {
 179             lexer,
 180             source_map,
 181             in_attribute: false,
 182             in_macro: false,
 183             in_macro_nonterminal: false,
 184         }
 185     }
 186
 187     /// Gets the next token out of the lexer.
 188     fn try_next_token(&mut self) -> Result<TokenAndSpan, HighlightError> {
 189         match self.lexer.try_next_token() {
 190             Ok(tas) => Ok(tas),
 191             Err(_) => Err(HighlightError::LexError),
 192         }
 193     }
 194
 195     /// Exhausts the `lexer` writing the output into `out`.
 196     ///
 197     /// The general structure for this method is to iterate over each token,
 198     /// possibly giving it an HTML span with a class specifying what flavor of token
 199     /// is used. All source code emission is done as slices from the source map,
 200     /// not from the tokens themselves, in order to stay true to the original
 201     /// source.
 202     fn write_source<W: Writer>(&mut self,
 203                                    out: &mut W)
 204                                    -> Result<(), HighlightError> {
 205         loop {
 206             let next = self.try_next_token()?;
 207             if next.tok == token::Eof {
 208                 break;
 209             }
 210
 211             self.write_token(out, next)?;
 212         }
 213
 214         Ok(())
 215     }
 216
 217     // Handles an individual token from the lexer.
 218     fn write_token<W: Writer>(&mut self,
 219                               out: &mut W,
 220                               tas: TokenAndSpan)
 221                               -> Result<(), HighlightError> {
 222         let klass = match tas.tok {
 223             token::Shebang(s) => {
 224                 out.string(Escape(&s.as_str()), Class::None)?;
 225                 return Ok(());
 226             },
 227
 228             token::Whitespace => Class::None,
 229             token::Comment => Class::Comment,
 230             token::DocComment(..) => Class::DocComment,
 231
 232             // If this '&' or '*' token is followed by a non-whitespace token, assume that it's the
 233             // reference or dereference operator or a reference or pointer type, instead of the
 234             // bit-and or multiplication operator.
 235             token::BinOp(token::And) | token::BinOp(token::Star)
 236                 if self.lexer.peek().tok != token::Whitespace => Class::RefKeyWord,
 237
 238             // Consider this as part of a macro invocation if there was a
 239             // leading identifier.
 240             token::Not if self.in_macro => {
 241                 self.in_macro = false;
 242                 Class::Macro
 243             }
 244
 245             // Operators.
 246             token::Eq | token::Lt | token::Le | token::EqEq | token::Ne | token::Ge | token::Gt |
 247                 token::AndAnd | token::OrOr | token::Not | token::BinOp(..) | token::RArrow |
 248                 token::BinOpEq(..) | token::FatArrow => Class::Op,
 249
 250             // Miscellaneous, no highlighting.
 251             token::Dot | token::DotDot | token::DotDotDot | token::DotDotEq | token::Comma |
 252                 token::Semi | token::Colon | token::ModSep | token::LArrow | token::OpenDelim(_) |
 253                 token::CloseDelim(token::Brace) | token::CloseDelim(token::Paren) |
 254                 token::CloseDelim(token::NoDelim) => Class::None,
 255
 256             token::Question => Class::QuestionMark,
 257
 258             token::Dollar => {
 259                 if self.lexer.peek().tok.is_ident() {
 260                     self.in_macro_nonterminal = true;
 261                     Class::MacroNonTerminal
 262                 } else {
 263                     Class::None
 264                 }
 265             }
 266
 267             // This might be the start of an attribute. We're going to want to
 268             // continue highlighting it as an attribute until the ending ']' is
 269             // seen, so skip out early. Down below we terminate the attribute
 270             // span when we see the ']'.
 271             token::Pound => {
 272                 // We can't be sure that our # begins an attribute (it could
 273                 // just be appearing in a macro) until we read either `#![` or
 274                 // `#[` from the input stream.
 275                 //
 276                 // We don't want to start highlighting as an attribute until
 277                 // we're confident there is going to be a ] coming up, as
 278                 // otherwise # tokens in macros highlight the rest of the input
 279                 // as an attribute.
 280
 281                 // Case 1: #![inner_attribute]
 282                 if self.lexer.peek().tok == token::Not {
 283                     self.try_next_token()?; // NOTE: consumes `!` token!
 284                     if self.lexer.peek().tok == token::OpenDelim(token::Bracket) {
 285                         self.in_attribute = true;
 286                         out.enter_span(Class::Attribute)?;
 287                     }
 288                     out.string("#", Class::None)?;
 289                     out.string("!", Class::None)?;
 290                     return Ok(());
 291                 }
 292
 293                 // Case 2: #[outer_attribute]
 294                 if self.lexer.peek().tok == token::OpenDelim(token::Bracket) {
 295                     self.in_attribute = true;
 296                     out.enter_span(Class::Attribute)?;
 297                 }
 298                 out.string("#", Class::None)?;
 299                 return Ok(());
 300             }
 301             token::CloseDelim(token::Bracket) => {
 302                 if self.in_attribute {
 303                     self.in_attribute = false;
 304                     out.string("]", Class::None)?;
 305                     out.exit_span()?;
 306                     return Ok(());
 307                 } else {
 308                     Class::None
 309                 }
 310             }
 311
 312             token::Literal(lit, _suf) => {
 313                 match lit {
 314                     // Text literals.
 315                     token::Byte(..) | token::Char(..) | token::Err(..) |
 316                         token::ByteStr(..) | token::ByteStrRaw(..) |
 317                         token::Str_(..) | token::StrRaw(..) => Class::String,
 318
 319                     // Number literals.
 320                     token::Integer(..) | token::Float(..) => Class::Number,
 321                 }
 322             }
 323
 324             // Keywords are also included in the identifier set.
 325             token::Ident(ident, is_raw) => {
 326                 match &*ident.as_str() {
 327                     "ref" | "mut" if !is_raw => Class::RefKeyWord,
 328
 329                     "self" | "Self" => Class::Self_,
 330                     "false" | "true" if !is_raw => Class::Bool,
 331
 332                     "Option" | "Result" => Class::PreludeTy,
 333                     "Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
 334
 335                     "$crate" => Class::KeyWord,
 336                     _ if tas.tok.is_reserved_ident() => Class::KeyWord,
 337
 338                     _ => {
 339                         if self.in_macro_nonterminal {
 340                             self.in_macro_nonterminal = false;
 341                             Class::MacroNonTerminal
 342                         } else if self.lexer.peek().tok == token::Not {
 343                             self.in_macro = true;
 344                             Class::Macro
 345                         } else {
 346                             Class::Ident
 347                         }
 348                     }
 349                 }
 350             }
 351
 352             token::Lifetime(..) => Class::Lifetime,
 353
 354             token::Eof | token::Interpolated(..) |
 355             token::Tilde | token::At| token::SingleQuote => Class::None,
 356         };
 357
 358         // Anything that didn't return above is the simple case where we the
 359         // class just spans a single token, so we can use the `string` method.
 360         out.string(Escape(&self.snip(tas.sp)), klass)?;
 361
 362         Ok(())
 363     }
 364
 365     // Helper function to get a snippet from the source_map.
 366     fn snip(&self, sp: Span) -> String {
 367         self.source_map.span_to_snippet(sp).unwrap()
 368     }
 369 }
 370
 371 impl Class {
 372     /// Returns the css class expected by rustdoc for each `Class`.
 373     fn rustdoc_class(self) -> &'static str {
 374         match self {
 375             Class::None => "",
 376             Class::Comment => "comment",
 377             Class::DocComment => "doccomment",
 378             Class::Attribute => "attribute",
 379             Class::KeyWord => "kw",
 380             Class::RefKeyWord => "kw-2",
 381             Class::Self_ => "self",
 382             Class::Op => "op",
 383             Class::Macro => "macro",
 384             Class::MacroNonTerminal => "macro-nonterminal",
 385             Class::String => "string",
 386             Class::Number => "number",
 387             Class::Bool => "bool-val",
 388             Class::Ident => "ident",
 389             Class::Lifetime => "lifetime",
 390             Class::PreludeTy => "prelude-ty",
 391             Class::PreludeVal => "prelude-val",
 392             Class::QuestionMark => "question-mark"
 393         }
 394     }
 395 }
 396
 397 fn write_header(class: Option<&str>, out: &mut dyn Write) -> io::Result<()> {
 398     write!(out, "<div class=\"example-wrap\"><pre class=\"rust {}\">\n", class.unwrap_or(""))
 399 }
 400
 401 fn write_footer(out: &mut dyn Write) -> io::Result<()> {
 402     write!(out, "</pre></div>\n")
 403 }