src/libsyntax/parse/lexer/comments.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 pub use self::CommentStyle::*;
  12
  13 use ast;
  14 use codemap::CodeMap;
  15 use syntax_pos::{BytePos, CharPos, Pos};
  16 use parse::lexer::{is_block_doc_comment, is_pattern_whitespace};
  17 use parse::lexer::{self, ParseSess, StringReader, TokenAndSpan};
  18 use print::pprust;
  19 use str::char_at;
  20
  21 use std::io::Read;
  22 use std::usize;
  23
  24 #[derive(Clone, Copy, PartialEq, Debug)]
  25 pub enum CommentStyle {
  26     /// No code on either side of each line of the comment
  27     Isolated,
  28     /// Code exists to the left of the comment
  29     Trailing,
  30     /// Code before /* foo */ and after the comment
  31     Mixed,
  32     /// Just a manual blank line "\n\n", for layout
  33     BlankLine,
  34 }
  35
  36 #[derive(Clone)]
  37 pub struct Comment {
  38     pub style: CommentStyle,
  39     pub lines: Vec<String>,
  40     pub pos: BytePos,
  41 }
  42
  43 pub fn is_doc_comment(s: &str) -> bool {
  44     (s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") ||
  45     (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!")
  46 }
  47
  48 pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
  49     assert!(is_doc_comment(comment));
  50     if comment.starts_with("//!") || comment.starts_with("/*!") {
  51         ast::AttrStyle::Inner
  52     } else {
  53         ast::AttrStyle::Outer
  54     }
  55 }
  56
  57 pub fn strip_doc_comment_decoration(comment: &str) -> String {
  58     /// remove whitespace-only lines from the start/end of lines
  59     fn vertical_trim(lines: Vec<String>) -> Vec<String> {
  60         let mut i = 0;
  61         let mut j = lines.len();
  62         // first line of all-stars should be omitted
  63         if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
  64             i += 1;
  65         }
  66         while i < j && lines[i].trim().is_empty() {
  67             i += 1;
  68         }
  69         // like the first, a last line of all stars should be omitted
  70         if j > i &&
  71            lines[j - 1]
  72                .chars()
  73                .skip(1)
  74                .all(|c| c == '*') {
  75             j -= 1;
  76         }
  77         while j > i && lines[j - 1].trim().is_empty() {
  78             j -= 1;
  79         }
  80         lines[i..j].to_vec()
  81     }
  82
  83     /// remove a "[ \t]*\*" block from each line, if possible
  84     fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
  85         let mut i = usize::MAX;
  86         let mut can_trim = true;
  87         let mut first = true;
  88         for line in &lines {
  89             for (j, c) in line.chars().enumerate() {
  90                 if j > i || !"* \t".contains(c) {
  91                     can_trim = false;
  92                     break;
  93                 }
  94                 if c == '*' {
  95                     if first {
  96                         i = j;
  97                         first = false;
  98                     } else if i != j {
  99                         can_trim = false;
 100                     }
 101                     break;
 102                 }
 103             }
 104             if i > line.len() {
 105                 can_trim = false;
 106             }
 107             if !can_trim {
 108                 break;
 109             }
 110         }
 111
 112         if can_trim {
 113             lines.iter()
 114                  .map(|line| (&line[i + 1..line.len()]).to_string())
 115                  .collect()
 116         } else {
 117             lines
 118         }
 119     }
 120
 121     // one-line comments lose their prefix
 122     const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
 123     for prefix in ONELINERS {
 124         if comment.starts_with(*prefix) {
 125             return (&comment[prefix.len()..]).to_string();
 126         }
 127     }
 128
 129     if comment.starts_with("/*") {
 130         let lines = comment[3..comment.len() - 2]
 131                         .lines()
 132                         .map(|s| s.to_string())
 133                         .collect::<Vec<String>>();
 134
 135         let lines = vertical_trim(lines);
 136         let lines = horizontal_trim(lines);
 137
 138         return lines.join("\n");
 139     }
 140
 141     panic!("not a doc-comment: {}", comment);
 142 }
 143
 144 fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
 145     debug!(">>> blank-line comment");
 146     comments.push(Comment {
 147         style: BlankLine,
 148         lines: Vec::new(),
 149         pos: rdr.pos,
 150     });
 151 }
 152
 153 fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader, comments: &mut Vec<Comment>) {
 154     while is_pattern_whitespace(rdr.ch) && !rdr.is_eof() {
 155         if rdr.ch_is('\n') {
 156             push_blank_line_comment(rdr, &mut *comments);
 157         }
 158         rdr.bump();
 159     }
 160 }
 161
 162 fn read_shebang_comment(rdr: &mut StringReader,
 163                         code_to_the_left: bool,
 164                         comments: &mut Vec<Comment>) {
 165     debug!(">>> shebang comment");
 166     let p = rdr.pos;
 167     debug!("<<< shebang comment");
 168     comments.push(Comment {
 169         style: if code_to_the_left { Trailing } else { Isolated },
 170         lines: vec![rdr.read_one_line_comment()],
 171         pos: p,
 172     });
 173 }
 174
 175 fn read_line_comments(rdr: &mut StringReader,
 176                       code_to_the_left: bool,
 177                       comments: &mut Vec<Comment>) {
 178     debug!(">>> line comments");
 179     let p = rdr.pos;
 180     let mut lines: Vec<String> = Vec::new();
 181     while rdr.ch_is('/') && rdr.nextch_is('/') {
 182         let line = rdr.read_one_line_comment();
 183         debug!("{}", line);
 184         // Doc comments are not put in comments.
 185         if is_doc_comment(&line[..]) {
 186             break;
 187         }
 188         lines.push(line);
 189         rdr.consume_non_eol_whitespace();
 190     }
 191     debug!("<<< line comments");
 192     if !lines.is_empty() {
 193         comments.push(Comment {
 194             style: if code_to_the_left { Trailing } else { Isolated },
 195             lines: lines,
 196             pos: p,
 197         });
 198     }
 199 }
 200
 201 /// Returns None if the first col chars of s contain a non-whitespace char.
 202 /// Otherwise returns Some(k) where k is first char offset after that leading
 203 /// whitespace.  Note k may be outside bounds of s.
 204 fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
 205     let len = s.len();
 206     let mut col = col.to_usize();
 207     let mut cursor: usize = 0;
 208     while col > 0 && cursor < len {
 209         let ch = char_at(s, cursor);
 210         if !ch.is_whitespace() {
 211             return None;
 212         }
 213         cursor += ch.len_utf8();
 214         col -= 1;
 215     }
 216     return Some(cursor);
 217 }
 218
 219 fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String>, s: String, col: CharPos) {
 220     let len = s.len();
 221     let s1 = match all_whitespace(&s[..], col) {
 222         Some(col) => {
 223             if col < len {
 224                 (&s[col..len]).to_string()
 225             } else {
 226                 "".to_string()
 227             }
 228         }
 229         None => s,
 230     };
 231     debug!("pushing line: {}", s1);
 232     lines.push(s1);
 233 }
 234
 235 fn read_block_comment(rdr: &mut StringReader,
 236                       code_to_the_left: bool,
 237                       comments: &mut Vec<Comment>) {
 238     debug!(">>> block comment");
 239     let p = rdr.pos;
 240     let mut lines: Vec<String> = Vec::new();
 241     let col = rdr.col;
 242     rdr.bump();
 243     rdr.bump();
 244
 245     let mut curr_line = String::from("/*");
 246
 247     // doc-comments are not really comments, they are attributes
 248     if (rdr.ch_is('*') && !rdr.nextch_is('*')) || rdr.ch_is('!') {
 249         while !(rdr.ch_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
 250             curr_line.push(rdr.ch.unwrap());
 251             rdr.bump();
 252         }
 253         if !rdr.is_eof() {
 254             curr_line.push_str("*/");
 255             rdr.bump();
 256             rdr.bump();
 257         }
 258         if is_block_doc_comment(&curr_line[..]) {
 259             return;
 260         }
 261         assert!(!curr_line.contains('\n'));
 262         lines.push(curr_line);
 263     } else {
 264         let mut level: isize = 1;
 265         while level > 0 {
 266             debug!("=== block comment level {}", level);
 267             if rdr.is_eof() {
 268                 panic!(rdr.fatal("unterminated block comment"));
 269             }
 270             if rdr.ch_is('\n') {
 271                 trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
 272                 curr_line = String::new();
 273                 rdr.bump();
 274             } else {
 275                 curr_line.push(rdr.ch.unwrap());
 276                 if rdr.ch_is('/') && rdr.nextch_is('*') {
 277                     rdr.bump();
 278                     rdr.bump();
 279                     curr_line.push('*');
 280                     level += 1;
 281                 } else {
 282                     if rdr.ch_is('*') && rdr.nextch_is('/') {
 283                         rdr.bump();
 284                         rdr.bump();
 285                         curr_line.push('/');
 286                         level -= 1;
 287                     } else {
 288                         rdr.bump();
 289                     }
 290                 }
 291             }
 292         }
 293         if !curr_line.is_empty() {
 294             trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
 295         }
 296     }
 297
 298     let mut style = if code_to_the_left {
 299         Trailing
 300     } else {
 301         Isolated
 302     };
 303     rdr.consume_non_eol_whitespace();
 304     if !rdr.is_eof() && !rdr.ch_is('\n') && lines.len() == 1 {
 305         style = Mixed;
 306     }
 307     debug!("<<< block comment");
 308     comments.push(Comment {
 309         style: style,
 310         lines: lines,
 311         pos: p,
 312     });
 313 }
 314
 315
 316 fn consume_comment(rdr: &mut StringReader,
 317                    comments: &mut Vec<Comment>,
 318                    code_to_the_left: &mut bool,
 319                    anything_to_the_left: &mut bool) {
 320     debug!(">>> consume comment");
 321     if rdr.ch_is('/') && rdr.nextch_is('/') {
 322         read_line_comments(rdr, *code_to_the_left, comments);
 323         *code_to_the_left = false;
 324         *anything_to_the_left = false;
 325     } else if rdr.ch_is('/') && rdr.nextch_is('*') {
 326         read_block_comment(rdr, *code_to_the_left, comments);
 327         *anything_to_the_left = true;
 328     } else if rdr.ch_is('#') && rdr.nextch_is('!') {
 329         read_shebang_comment(rdr, *code_to_the_left, comments);
 330         *code_to_the_left = false;
 331         *anything_to_the_left = false;
 332     } else {
 333         panic!();
 334     }
 335     debug!("<<< consume comment");
 336 }
 337
 338 #[derive(Clone)]
 339 pub struct Literal {
 340     pub lit: String,
 341     pub pos: BytePos,
 342 }
 343
 344 // it appears this function is called only from pprust... that's
 345 // probably not a good thing.
 346 pub fn gather_comments_and_literals(sess: &ParseSess, path: String, srdr: &mut Read)
 347                                     -> (Vec<Comment>, Vec<Literal>) {
 348     let mut src = Vec::new();
 349     srdr.read_to_end(&mut src).unwrap();
 350     let src = String::from_utf8(src).unwrap();
 351     let cm = CodeMap::new(sess.codemap().path_mapping().clone());
 352     let filemap = cm.new_filemap(path, src);
 353     let mut rdr = lexer::StringReader::new_raw(sess, filemap);
 354
 355     let mut comments: Vec<Comment> = Vec::new();
 356     let mut literals: Vec<Literal> = Vec::new();
 357     let mut code_to_the_left = false; // Only code
 358     let mut anything_to_the_left = false; // Code or comments
 359     while !rdr.is_eof() {
 360         loop {
 361             // Eat all the whitespace and count blank lines.
 362             rdr.consume_non_eol_whitespace();
 363             if rdr.ch_is('\n') {
 364                 if anything_to_the_left {
 365                     rdr.bump(); // The line is not blank, do not count.
 366                 }
 367                 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
 368                 code_to_the_left = false;
 369                 anything_to_the_left = false;
 370             }
 371             // Eat one comment group
 372             if rdr.peeking_at_comment() {
 373                 consume_comment(&mut rdr, &mut comments,
 374                                 &mut code_to_the_left, &mut anything_to_the_left);
 375             } else {
 376                 break
 377             }
 378         }
 379
 380         let bstart = rdr.pos;
 381         rdr.next_token();
 382         // discard, and look ahead; we're working with internal state
 383         let TokenAndSpan { tok, sp } = rdr.peek();
 384         if tok.is_lit() {
 385             rdr.with_str_from(bstart, |s| {
 386                 debug!("tok lit: {}", s);
 387                 literals.push(Literal {
 388                     lit: s.to_string(),
 389                     pos: sp.lo,
 390                 });
 391             })
 392         } else {
 393             debug!("tok: {}", pprust::token_to_string(&tok));
 394         }
 395         code_to_the_left = true;
 396         anything_to_the_left = true;
 397     }
 398
 399     (comments, literals)
 400 }
 401
 402 #[cfg(test)]
 403 mod tests {
 404     use super::*;
 405
 406     #[test]
 407     fn test_block_doc_comment_1() {
 408         let comment = "/**\n * Test \n **  Test\n *   Test\n*/";
 409         let stripped = strip_doc_comment_decoration(comment);
 410         assert_eq!(stripped, " Test \n*  Test\n   Test");
 411     }
 412
 413     #[test]
 414     fn test_block_doc_comment_2() {
 415         let comment = "/**\n * Test\n *  Test\n*/";
 416         let stripped = strip_doc_comment_decoration(comment);
 417         assert_eq!(stripped, " Test\n  Test");
 418     }
 419
 420     #[test]
 421     fn test_block_doc_comment_3() {
 422         let comment = "/**\n let a: *i32;\n *a = 5;\n*/";
 423         let stripped = strip_doc_comment_decoration(comment);
 424         assert_eq!(stripped, " let a: *i32;\n *a = 5;");
 425     }
 426
 427     #[test]
 428     fn test_block_doc_comment_4() {
 429         let comment = "/*******************\n test\n *********************/";
 430         let stripped = strip_doc_comment_decoration(comment);
 431         assert_eq!(stripped, " test");
 432     }
 433
 434     #[test]
 435     fn test_line_doc_comment() {
 436         let stripped = strip_doc_comment_decoration("/// test");
 437         assert_eq!(stripped, " test");
 438         let stripped = strip_doc_comment_decoration("///! test");
 439         assert_eq!(stripped, " test");
 440         let stripped = strip_doc_comment_decoration("// test");
 441         assert_eq!(stripped, " test");
 442         let stripped = strip_doc_comment_decoration("// test");
 443         assert_eq!(stripped, " test");
 444         let stripped = strip_doc_comment_decoration("///test");
 445         assert_eq!(stripped, "test");
 446         let stripped = strip_doc_comment_decoration("///!test");
 447         assert_eq!(stripped, "test");
 448         let stripped = strip_doc_comment_decoration("//test");
 449         assert_eq!(stripped, "test");
 450     }
 451 }