compiler/rustc_ast/src/util/comments.rs

   1 use crate::token::CommentKind;
   2 use rustc_span::source_map::SourceMap;
   3 use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};
   4
   5 #[cfg(test)]
   6 mod tests;
   7
   8 #[derive(Clone, Copy, PartialEq, Debug)]
   9 pub enum CommentStyle {
  10     /// No code on either side of each line of the comment
  11     Isolated,
  12     /// Code exists to the left of the comment
  13     Trailing,
  14     /// Code before /* foo */ and after the comment
  15     Mixed,
  16     /// Just a manual blank line "\n\n", for layout
  17     BlankLine,
  18 }
  19
  20 #[derive(Clone)]
  21 pub struct Comment {
  22     pub style: CommentStyle,
  23     pub lines: Vec<String>,
  24     pub pos: BytePos,
  25 }
  26
  27 /// A fast conservative estimate on whether the string can contain documentation links.
  28 /// A pair of square brackets `[]` must exist in the string, but we only search for the
  29 /// opening bracket because brackets always go in pairs in practice.
  30 #[inline]
  31 pub fn may_have_doc_links(s: &str) -> bool {
  32     s.contains('[')
  33 }
  34
  35 /// Makes a doc string more presentable to users.
  36 /// Used by rustdoc and perhaps other tools, but not by rustc.
  37 pub fn beautify_doc_string(data: Symbol, kind: CommentKind) -> Symbol {
  38     fn get_vertical_trim(lines: &[&str]) -> Option<(usize, usize)> {
  39         let mut i = 0;
  40         let mut j = lines.len();
  41         // first line of all-stars should be omitted
  42         if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
  43             i += 1;
  44         }
  45
  46         // like the first, a last line of all stars should be omitted
  47         if j > i && !lines[j - 1].is_empty() && lines[j - 1].chars().all(|c| c == '*') {
  48             j -= 1;
  49         }
  50
  51         if i != 0 || j != lines.len() { Some((i, j)) } else { None }
  52     }
  53
  54     fn get_horizontal_trim<'a>(lines: &'a [&str], kind: CommentKind) -> Option<String> {
  55         let mut i = usize::MAX;
  56         let mut first = true;
  57
  58         // In case we have doc comments like `/**` or `/*!`, we want to remove stars if they are
  59         // present. However, we first need to strip the empty lines so they don't get in the middle
  60         // when we try to compute the "horizontal trim".
  61         let lines = if kind == CommentKind::Block {
  62             // Whatever happens, we skip the first line.
  63             let mut i = lines
  64                 .get(0)
  65                 .map(|l| if l.trim_start().starts_with('*') { 0 } else { 1 })
  66                 .unwrap_or(0);
  67             let mut j = lines.len();
  68
  69             while i < j && lines[i].trim().is_empty() {
  70                 i += 1;
  71             }
  72             while j > i && lines[j - 1].trim().is_empty() {
  73                 j -= 1;
  74             }
  75             &lines[i..j]
  76         } else {
  77             lines
  78         };
  79
  80         for line in lines {
  81             for (j, c) in line.chars().enumerate() {
  82                 if j > i || !"* \t".contains(c) {
  83                     return None;
  84                 }
  85                 if c == '*' {
  86                     if first {
  87                         i = j;
  88                         first = false;
  89                     } else if i != j {
  90                         return None;
  91                     }
  92                     break;
  93                 }
  94             }
  95             if i >= line.len() {
  96                 return None;
  97             }
  98         }
  99         if lines.is_empty() { None } else { Some(lines[0][..i].into()) }
 100     }
 101
 102     let data_s = data.as_str();
 103     if data_s.contains('\n') {
 104         let mut lines = data_s.lines().collect::<Vec<&str>>();
 105         let mut changes = false;
 106         let lines = if let Some((i, j)) = get_vertical_trim(&lines) {
 107             changes = true;
 108             // remove whitespace-only lines from the start/end of lines
 109             &mut lines[i..j]
 110         } else {
 111             &mut lines
 112         };
 113         if let Some(horizontal) = get_horizontal_trim(&lines, kind) {
 114             changes = true;
 115             // remove a "[ \t]*\*" block from each line, if possible
 116             for line in lines.iter_mut() {
 117                 if let Some(tmp) = line.strip_prefix(&horizontal) {
 118                     *line = tmp;
 119                     if kind == CommentKind::Block
 120                         && (*line == "*" || line.starts_with("* ") || line.starts_with("**"))
 121                     {
 122                         *line = &line[1..];
 123                     }
 124                 }
 125             }
 126         }
 127         if changes {
 128             return Symbol::intern(&lines.join("\n"));
 129         }
 130     }
 131     data
 132 }
 133
 134 /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
 135 /// Otherwise returns `Some(k)` where `k` is first char offset after that leading
 136 /// whitespace. Note that `k` may be outside bounds of `s`.
 137 fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
 138     let mut idx = 0;
 139     for (i, ch) in s.char_indices().take(col.to_usize()) {
 140         if !ch.is_whitespace() {
 141             return None;
 142         }
 143         idx = i + ch.len_utf8();
 144     }
 145     Some(idx)
 146 }
 147
 148 fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
 149     let len = s.len();
 150     match all_whitespace(&s, col) {
 151         Some(col) => {
 152             if col < len {
 153                 &s[col..]
 154             } else {
 155                 ""
 156             }
 157         }
 158         None => s,
 159     }
 160 }
 161
 162 fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec<String> {
 163     let mut res: Vec<String> = vec![];
 164     let mut lines = text.lines();
 165     // just push the first line
 166     res.extend(lines.next().map(|it| it.to_string()));
 167     // for other lines, strip common whitespace prefix
 168     for line in lines {
 169         res.push(trim_whitespace_prefix(line, col).to_string())
 170     }
 171     res
 172 }
 173
 174 // it appears this function is called only from pprust... that's
 175 // probably not a good thing.
 176 pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> {
 177     let sm = SourceMap::new(sm.path_mapping().clone());
 178     let source_file = sm.new_source_file(path, src);
 179     let text = (*source_file.src.as_ref().unwrap()).clone();
 180
 181     let text: &str = text.as_str();
 182     let start_bpos = source_file.start_pos;
 183     let mut pos = 0;
 184     let mut comments: Vec<Comment> = Vec::new();
 185     let mut code_to_the_left = false;
 186
 187     if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
 188         comments.push(Comment {
 189             style: CommentStyle::Isolated,
 190             lines: vec![text[..shebang_len].to_string()],
 191             pos: start_bpos,
 192         });
 193         pos += shebang_len;
 194     }
 195
 196     for token in rustc_lexer::tokenize(&text[pos..]) {
 197         let token_text = &text[pos..pos + token.len as usize];
 198         match token.kind {
 199             rustc_lexer::TokenKind::Whitespace => {
 200                 if let Some(mut idx) = token_text.find('\n') {
 201                     code_to_the_left = false;
 202                     while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
 203                         idx += 1 + next_newline;
 204                         comments.push(Comment {
 205                             style: CommentStyle::BlankLine,
 206                             lines: vec![],
 207                             pos: start_bpos + BytePos((pos + idx) as u32),
 208                         });
 209                     }
 210                 }
 211             }
 212             rustc_lexer::TokenKind::BlockComment { doc_style, .. } => {
 213                 if doc_style.is_none() {
 214                     let code_to_the_right = !matches!(
 215                         text[pos + token.len as usize..].chars().next(),
 216                         Some('\r' | '\n')
 217                     );
 218                     let style = match (code_to_the_left, code_to_the_right) {
 219                         (_, true) => CommentStyle::Mixed,
 220                         (false, false) => CommentStyle::Isolated,
 221                         (true, false) => CommentStyle::Trailing,
 222                     };
 223
 224                     // Count the number of chars since the start of the line by rescanning.
 225                     let pos_in_file = start_bpos + BytePos(pos as u32);
 226                     let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
 227                     let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
 228                     let col = CharPos(text[line_begin_pos..pos].chars().count());
 229
 230                     let lines = split_block_comment_into_lines(token_text, col);
 231                     comments.push(Comment { style, lines, pos: pos_in_file })
 232                 }
 233             }
 234             rustc_lexer::TokenKind::LineComment { doc_style } => {
 235                 if doc_style.is_none() {
 236                     comments.push(Comment {
 237                         style: if code_to_the_left {
 238                             CommentStyle::Trailing
 239                         } else {
 240                             CommentStyle::Isolated
 241                         },
 242                         lines: vec![token_text.to_string()],
 243                         pos: start_bpos + BytePos(pos as u32),
 244                     })
 245                 }
 246             }
 247             _ => {
 248                 code_to_the_left = true;
 249             }
 250         }
 251         pos += token.len as usize;
 252     }
 253
 254     comments
 255 }