src/libsyntax/util/comments.rs

   1 pub use CommentStyle::*;
   2
   3 use crate::ast;
   4 use crate::sess::ParseSess;
   5
   6 use rustc_span::source_map::SourceMap;
   7 use rustc_span::{BytePos, CharPos, FileName, Pos};
   8
   9 use std::usize;
  10
  11 use log::debug;
  12
  13 #[cfg(test)]
  14 mod tests;
  15
  16 #[derive(Clone, Copy, PartialEq, Debug)]
  17 pub enum CommentStyle {
  18     /// No code on either side of each line of the comment
  19     Isolated,
  20     /// Code exists to the left of the comment
  21     Trailing,
  22     /// Code before /* foo */ and after the comment
  23     Mixed,
  24     /// Just a manual blank line "\n\n", for layout
  25     BlankLine,
  26 }
  27
  28 #[derive(Clone)]
  29 pub struct Comment {
  30     pub style: CommentStyle,
  31     pub lines: Vec<String>,
  32     pub pos: BytePos,
  33 }
  34
  35 pub fn is_line_doc_comment(s: &str) -> bool {
  36     let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/')
  37         || s.starts_with("//!");
  38     debug!("is {:?} a doc comment? {}", s, res);
  39     res
  40 }
  41
  42 pub fn is_block_doc_comment(s: &str) -> bool {
  43     // Prevent `/**/` from being parsed as a doc comment
  44     let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*')
  45         || s.starts_with("/*!"))
  46         && s.len() >= 5;
  47     debug!("is {:?} a doc comment? {}", s, res);
  48     res
  49 }
  50
  51 // FIXME(#64197): Try to privatize this again.
  52 pub fn is_doc_comment(s: &str) -> bool {
  53     (s.starts_with("///") && is_line_doc_comment(s))
  54         || s.starts_with("//!")
  55         || (s.starts_with("/**") && is_block_doc_comment(s))
  56         || s.starts_with("/*!")
  57 }
  58
  59 pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
  60     assert!(is_doc_comment(comment));
  61     if comment.starts_with("//!") || comment.starts_with("/*!") {
  62         ast::AttrStyle::Inner
  63     } else {
  64         ast::AttrStyle::Outer
  65     }
  66 }
  67
  68 pub fn strip_doc_comment_decoration(comment: &str) -> String {
  69     /// remove whitespace-only lines from the start/end of lines
  70     fn vertical_trim(lines: Vec<String>) -> Vec<String> {
  71         let mut i = 0;
  72         let mut j = lines.len();
  73         // first line of all-stars should be omitted
  74         if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
  75             i += 1;
  76         }
  77
  78         while i < j && lines[i].trim().is_empty() {
  79             i += 1;
  80         }
  81         // like the first, a last line of all stars should be omitted
  82         if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') {
  83             j -= 1;
  84         }
  85
  86         while j > i && lines[j - 1].trim().is_empty() {
  87             j -= 1;
  88         }
  89
  90         lines[i..j].to_vec()
  91     }
  92
  93     /// remove a "[ \t]*\*" block from each line, if possible
  94     fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
  95         let mut i = usize::MAX;
  96         let mut can_trim = true;
  97         let mut first = true;
  98
  99         for line in &lines {
 100             for (j, c) in line.chars().enumerate() {
 101                 if j > i || !"* \t".contains(c) {
 102                     can_trim = false;
 103                     break;
 104                 }
 105                 if c == '*' {
 106                     if first {
 107                         i = j;
 108                         first = false;
 109                     } else if i != j {
 110                         can_trim = false;
 111                     }
 112                     break;
 113                 }
 114             }
 115             if i >= line.len() {
 116                 can_trim = false;
 117             }
 118             if !can_trim {
 119                 break;
 120             }
 121         }
 122
 123         if can_trim {
 124             lines.iter().map(|line| (&line[i + 1..line.len()]).to_string()).collect()
 125         } else {
 126             lines
 127         }
 128     }
 129
 130     // one-line comments lose their prefix
 131     const ONELINERS: &[&str] = &["///!", "///", "//!", "//"];
 132
 133     for prefix in ONELINERS {
 134         if comment.starts_with(*prefix) {
 135             return (&comment[prefix.len()..]).to_string();
 136         }
 137     }
 138
 139     if comment.starts_with("/*") {
 140         let lines =
 141             comment[3..comment.len() - 2].lines().map(|s| s.to_string()).collect::<Vec<String>>();
 142
 143         let lines = vertical_trim(lines);
 144         let lines = horizontal_trim(lines);
 145
 146         return lines.join("\n");
 147     }
 148
 149     panic!("not a doc-comment: {}", comment);
 150 }
 151
 152 /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
 153 /// Otherwise returns `Some(k)` where `k` is first char offset after that leading
 154 /// whitespace. Note that `k` may be outside bounds of `s`.
 155 fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
 156     let mut idx = 0;
 157     for (i, ch) in s.char_indices().take(col.to_usize()) {
 158         if !ch.is_whitespace() {
 159             return None;
 160         }
 161         idx = i + ch.len_utf8();
 162     }
 163     Some(idx)
 164 }
 165
 166 fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
 167     let len = s.len();
 168     match all_whitespace(&s, col) {
 169         Some(col) => {
 170             if col < len {
 171                 &s[col..]
 172             } else {
 173                 ""
 174             }
 175         }
 176         None => s,
 177     }
 178 }
 179
 180 fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec<String> {
 181     let mut res: Vec<String> = vec![];
 182     let mut lines = text.lines();
 183     // just push the first line
 184     res.extend(lines.next().map(|it| it.to_string()));
 185     // for other lines, strip common whitespace prefix
 186     for line in lines {
 187         res.push(trim_whitespace_prefix(line, col).to_string())
 188     }
 189     res
 190 }
 191
 192 // it appears this function is called only from pprust... that's
 193 // probably not a good thing.
 194 crate fn gather_comments(sess: &ParseSess, path: FileName, src: String) -> Vec<Comment> {
 195     let cm = SourceMap::new(sess.source_map().path_mapping().clone());
 196     let source_file = cm.new_source_file(path, src);
 197     let text = (*source_file.src.as_ref().unwrap()).clone();
 198
 199     let text: &str = text.as_str();
 200     let start_bpos = source_file.start_pos;
 201     let mut pos = 0;
 202     let mut comments: Vec<Comment> = Vec::new();
 203     let mut code_to_the_left = false;
 204
 205     if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
 206         comments.push(Comment {
 207             style: Isolated,
 208             lines: vec![text[..shebang_len].to_string()],
 209             pos: start_bpos,
 210         });
 211         pos += shebang_len;
 212     }
 213
 214     for token in rustc_lexer::tokenize(&text[pos..]) {
 215         let token_text = &text[pos..pos + token.len];
 216         match token.kind {
 217             rustc_lexer::TokenKind::Whitespace => {
 218                 if let Some(mut idx) = token_text.find('\n') {
 219                     code_to_the_left = false;
 220                     while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
 221                         idx = idx + 1 + next_newline;
 222                         comments.push(Comment {
 223                             style: BlankLine,
 224                             lines: vec![],
 225                             pos: start_bpos + BytePos((pos + idx) as u32),
 226                         });
 227                     }
 228                 }
 229             }
 230             rustc_lexer::TokenKind::BlockComment { terminated: _ } => {
 231                 if !is_block_doc_comment(token_text) {
 232                     let code_to_the_right = match text[pos + token.len..].chars().next() {
 233                         Some('\r') | Some('\n') => false,
 234                         _ => true,
 235                     };
 236                     let style = match (code_to_the_left, code_to_the_right) {
 237                         (true, true) | (false, true) => Mixed,
 238                         (false, false) => Isolated,
 239                         (true, false) => Trailing,
 240                     };
 241
 242                     // Count the number of chars since the start of the line by rescanning.
 243                     let pos_in_file = start_bpos + BytePos(pos as u32);
 244                     let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
 245                     let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
 246                     let col = CharPos(text[line_begin_pos..pos].chars().count());
 247
 248                     let lines = split_block_comment_into_lines(token_text, col);
 249                     comments.push(Comment { style, lines, pos: pos_in_file })
 250                 }
 251             }
 252             rustc_lexer::TokenKind::LineComment => {
 253                 if !is_doc_comment(token_text) {
 254                     comments.push(Comment {
 255                         style: if code_to_the_left { Trailing } else { Isolated },
 256                         lines: vec![token_text.to_string()],
 257                         pos: start_bpos + BytePos(pos as u32),
 258                     })
 259                 }
 260             }
 261             _ => {
 262                 code_to_the_left = true;
 263             }
 264         }
 265         pos += token.len;
 266     }
 267
 268     comments
 269 }