src/libsyntax/util/comments.rs

   1 pub use CommentStyle::*;
   2
   3 use crate::ast;
   4 use crate::source_map::SourceMap;
   5 use crate::sess::ParseSess;
   6
   7 use syntax_pos::{BytePos, CharPos, Pos, FileName};
   8
   9 use std::usize;
  10
  11 use log::debug;
  12
  13 #[cfg(test)]
  14 mod tests;
  15
  16 #[derive(Clone, Copy, PartialEq, Debug)]
  17 pub enum CommentStyle {
  18     /// No code on either side of each line of the comment
  19     Isolated,
  20     /// Code exists to the left of the comment
  21     Trailing,
  22     /// Code before /* foo */ and after the comment
  23     Mixed,
  24     /// Just a manual blank line "\n\n", for layout
  25     BlankLine,
  26 }
  27
  28 #[derive(Clone)]
  29 pub struct Comment {
  30     pub style: CommentStyle,
  31     pub lines: Vec<String>,
  32     pub pos: BytePos,
  33 }
  34
  35 pub fn is_line_doc_comment(s: &str) -> bool {
  36     let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
  37               s.starts_with("//!");
  38     debug!("is {:?} a doc comment? {}", s, res);
  39     res
  40 }
  41
  42 pub fn is_block_doc_comment(s: &str) -> bool {
  43     // Prevent `/**/` from being parsed as a doc comment
  44     let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
  45                s.starts_with("/*!")) && s.len() >= 5;
  46     debug!("is {:?} a doc comment? {}", s, res);
  47     res
  48 }
  49
  50 // FIXME(#64197): Try to privatize this again.
  51 pub fn is_doc_comment(s: &str) -> bool {
  52     (s.starts_with("///") && is_line_doc_comment(s)) || s.starts_with("//!") ||
  53     (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!")
  54 }
  55
  56 pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
  57     assert!(is_doc_comment(comment));
  58     if comment.starts_with("//!") || comment.starts_with("/*!") {
  59         ast::AttrStyle::Inner
  60     } else {
  61         ast::AttrStyle::Outer
  62     }
  63 }
  64
  65 pub fn strip_doc_comment_decoration(comment: &str) -> String {
  66     /// remove whitespace-only lines from the start/end of lines
  67     fn vertical_trim(lines: Vec<String>) -> Vec<String> {
  68         let mut i = 0;
  69         let mut j = lines.len();
  70         // first line of all-stars should be omitted
  71         if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
  72             i += 1;
  73         }
  74
  75         while i < j && lines[i].trim().is_empty() {
  76             i += 1;
  77         }
  78         // like the first, a last line of all stars should be omitted
  79         if j > i &&
  80            lines[j - 1]
  81                .chars()
  82                .skip(1)
  83                .all(|c| c == '*') {
  84             j -= 1;
  85         }
  86
  87         while j > i && lines[j - 1].trim().is_empty() {
  88             j -= 1;
  89         }
  90
  91         lines[i..j].to_vec()
  92     }
  93
  94     /// remove a "[ \t]*\*" block from each line, if possible
  95     fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
  96         let mut i = usize::MAX;
  97         let mut can_trim = true;
  98         let mut first = true;
  99
 100         for line in &lines {
 101             for (j, c) in line.chars().enumerate() {
 102                 if j > i || !"* \t".contains(c) {
 103                     can_trim = false;
 104                     break;
 105                 }
 106                 if c == '*' {
 107                     if first {
 108                         i = j;
 109                         first = false;
 110                     } else if i != j {
 111                         can_trim = false;
 112                     }
 113                     break;
 114                 }
 115             }
 116             if i >= line.len() {
 117                 can_trim = false;
 118             }
 119             if !can_trim {
 120                 break;
 121             }
 122         }
 123
 124         if can_trim {
 125             lines.iter()
 126                  .map(|line| (&line[i + 1..line.len()]).to_string())
 127                  .collect()
 128         } else {
 129             lines
 130         }
 131     }
 132
 133     // one-line comments lose their prefix
 134     const ONELINERS: &[&str] = &["///!", "///", "//!", "//"];
 135
 136     for prefix in ONELINERS {
 137         if comment.starts_with(*prefix) {
 138             return (&comment[prefix.len()..]).to_string();
 139         }
 140     }
 141
 142     if comment.starts_with("/*") {
 143         let lines = comment[3..comment.len() - 2]
 144                         .lines()
 145                         .map(|s| s.to_string())
 146                         .collect::<Vec<String>>();
 147
 148         let lines = vertical_trim(lines);
 149         let lines = horizontal_trim(lines);
 150
 151         return lines.join("\n");
 152     }
 153
 154     panic!("not a doc-comment: {}", comment);
 155 }
 156
 157 /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
 158 /// Otherwise returns `Some(k)` where `k` is first char offset after that leading
 159 /// whitespace. Note that `k` may be outside bounds of `s`.
 160 fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
 161     let mut idx = 0;
 162     for (i, ch) in s.char_indices().take(col.to_usize()) {
 163         if !ch.is_whitespace() {
 164             return None;
 165         }
 166         idx = i + ch.len_utf8();
 167     }
 168     Some(idx)
 169 }
 170
 171 fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
 172     let len = s.len();
 173     match all_whitespace(&s, col) {
 174         Some(col) => if col < len { &s[col..] } else { "" },
 175         None => s,
 176     }
 177 }
 178
 179 fn split_block_comment_into_lines(
 180     text: &str,
 181     col: CharPos,
 182 ) -> Vec<String> {
 183     let mut res: Vec<String> = vec![];
 184     let mut lines = text.lines();
 185     // just push the first line
 186     res.extend(lines.next().map(|it| it.to_string()));
 187     // for other lines, strip common whitespace prefix
 188     for line in lines {
 189         res.push(trim_whitespace_prefix(line, col).to_string())
 190     }
 191     res
 192 }
 193
 194 // it appears this function is called only from pprust... that's
 195 // probably not a good thing.
 196 crate fn gather_comments(sess: &ParseSess, path: FileName, src: String) -> Vec<Comment> {
 197     let cm = SourceMap::new(sess.source_map().path_mapping().clone());
 198     let source_file = cm.new_source_file(path, src);
 199     let text = (*source_file.src.as_ref().unwrap()).clone();
 200
 201     let text: &str = text.as_str();
 202     let start_bpos = source_file.start_pos;
 203     let mut pos = 0;
 204     let mut comments: Vec<Comment> = Vec::new();
 205     let mut code_to_the_left = false;
 206
 207     if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
 208         comments.push(Comment {
 209             style: Isolated,
 210             lines: vec![text[..shebang_len].to_string()],
 211             pos: start_bpos,
 212         });
 213         pos += shebang_len;
 214     }
 215
 216     for token in rustc_lexer::tokenize(&text[pos..]) {
 217         let token_text = &text[pos..pos + token.len];
 218         match token.kind {
 219             rustc_lexer::TokenKind::Whitespace => {
 220                 if let Some(mut idx) = token_text.find('\n') {
 221                     code_to_the_left = false;
 222                     while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
 223                         idx = idx + 1 + next_newline;
 224                         comments.push(Comment {
 225                             style: BlankLine,
 226                             lines: vec![],
 227                             pos: start_bpos + BytePos((pos + idx) as u32),
 228                         });
 229                     }
 230                 }
 231             }
 232             rustc_lexer::TokenKind::BlockComment { terminated: _ } => {
 233                 if !is_block_doc_comment(token_text) {
 234                     let code_to_the_right = match text[pos + token.len..].chars().next() {
 235                         Some('\r') | Some('\n') => false,
 236                         _ => true,
 237                     };
 238                     let style = match (code_to_the_left, code_to_the_right) {
 239                         (true, true) | (false, true) => Mixed,
 240                         (false, false) => Isolated,
 241                         (true, false) => Trailing,
 242                     };
 243
 244                     // Count the number of chars since the start of the line by rescanning.
 245                     let pos_in_file = start_bpos + BytePos(pos as u32);
 246                     let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
 247                     let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
 248                     let col = CharPos(text[line_begin_pos..pos].chars().count());
 249
 250                     let lines = split_block_comment_into_lines(token_text, col);
 251                     comments.push(Comment { style, lines, pos: pos_in_file })
 252                 }
 253             }
 254             rustc_lexer::TokenKind::LineComment => {
 255                 if !is_doc_comment(token_text) {
 256                     comments.push(Comment {
 257                         style: if code_to_the_left { Trailing } else { Isolated },
 258                         lines: vec![token_text.to_string()],
 259                         pos: start_bpos + BytePos(pos as u32),
 260                     })
 261                 }
 262             }
 263             _ => {
 264                 code_to_the_left = true;
 265             }
 266         }
 267         pos += token.len;
 268     }
 269
 270     comments
 271 }