src/libsyntax/parse/comments.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 use ast;
  12 use codemap::{BytePos, CharPos, CodeMap, Pos};
  13 use diagnostic;
  14 use parse::lexer::{is_whitespace, with_str_from, Reader};
  15 use parse::lexer::{StringReader, bump, is_eof, nextch_is, TokenAndSpan};
  16 use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
  17 use parse::lexer;
  18 use parse::token;
  19
  20 use std::io;
  21 use std::str;
  22 use std::strbuf::StrBuf;
  23 use std::uint;
  24
  25 #[deriving(Clone, Eq)]
  26 pub enum CommentStyle {
  27     Isolated, // No code on either side of each line of the comment
  28     Trailing, // Code exists to the left of the comment
  29     Mixed, // Code before /* foo */ and after the comment
  30     BlankLine, // Just a manual blank line "\n\n", for layout
  31 }
  32
  33 #[deriving(Clone)]
  34 pub struct Comment {
  35     pub style: CommentStyle,
  36     pub lines: Vec<~str>,
  37     pub pos: BytePos,
  38 }
  39
  40 pub fn is_doc_comment(s: &str) -> bool {
  41     (s.starts_with("///") && !is_line_non_doc_comment(s)) ||
  42     s.starts_with("//!") ||
  43     (s.starts_with("/**") && !is_block_non_doc_comment(s)) ||
  44     s.starts_with("/*!")
  45 }
  46
  47 pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
  48     assert!(is_doc_comment(comment));
  49     if comment.starts_with("//!") || comment.starts_with("/*!") {
  50         ast::AttrInner
  51     } else {
  52         ast::AttrOuter
  53     }
  54 }
  55
  56 pub fn strip_doc_comment_decoration(comment: &str) -> ~str {
  57     /// remove whitespace-only lines from the start/end of lines
  58     fn vertical_trim(lines: Vec<~str> ) -> Vec<~str> {
  59         let mut i = 0u;
  60         let mut j = lines.len();
  61         // first line of all-stars should be omitted
  62         if lines.len() > 0 && lines.get(0).chars().all(|c| c == '*') {
  63             i += 1;
  64         }
  65         while i < j && lines.get(i).trim().is_empty() {
  66             i += 1;
  67         }
  68         // like the first, a last line of all stars should be omitted
  69         if j > i && lines.get(j - 1).chars().skip(1).all(|c| c == '*') {
  70             j -= 1;
  71         }
  72         while j > i && lines.get(j - 1).trim().is_empty() {
  73             j -= 1;
  74         }
  75         return lines.slice(i, j).iter().map(|x| (*x).clone()).collect();
  76     }
  77
  78     /// remove a "[ \t]*\*" block from each line, if possible
  79     fn horizontal_trim(lines: Vec<~str> ) -> Vec<~str> {
  80         let mut i = uint::MAX;
  81         let mut can_trim = true;
  82         let mut first = true;
  83         for line in lines.iter() {
  84             for (j, c) in line.chars().enumerate() {
  85                 if j > i || !"* \t".contains_char(c) {
  86                     can_trim = false;
  87                     break;
  88                 }
  89                 if c == '*' {
  90                     if first {
  91                         i = j;
  92                         first = false;
  93                     } else if i != j {
  94                         can_trim = false;
  95                     }
  96                     break;
  97                 }
  98             }
  99             if i > line.len() {
 100                 can_trim = false;
 101             }
 102             if !can_trim {
 103                 break;
 104             }
 105         }
 106
 107         if can_trim {
 108             lines.iter().map(|line| line.slice(i + 1, line.len()).to_owned()).collect()
 109         } else {
 110             lines
 111         }
 112     }
 113
 114     // one-line comments lose their prefix
 115     static ONLINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
 116     for prefix in ONLINERS.iter() {
 117         if comment.starts_with(*prefix) {
 118             return comment.slice_from(prefix.len()).to_owned();
 119         }
 120     }
 121
 122     if comment.starts_with("/*") {
 123         let lines = comment.slice(3u, comment.len() - 2u)
 124             .lines_any()
 125             .map(|s| s.to_owned())
 126             .collect::<Vec<~str> >();
 127
 128         let lines = vertical_trim(lines);
 129         let lines = horizontal_trim(lines);
 130
 131         return lines.connect("\n");
 132     }
 133
 134     fail!("not a doc-comment: {}", comment);
 135 }
 136
 137 fn read_to_eol(rdr: &mut StringReader) -> ~str {
 138     let mut val = StrBuf::new();
 139     while !rdr.curr_is('\n') && !is_eof(rdr) {
 140         val.push_char(rdr.curr.unwrap());
 141         bump(rdr);
 142     }
 143     if rdr.curr_is('\n') { bump(rdr); }
 144     return val.into_owned();
 145 }
 146
 147 fn read_one_line_comment(rdr: &mut StringReader) -> ~str {
 148     let val = read_to_eol(rdr);
 149     assert!((val[0] == '/' as u8 && val[1] == '/' as u8) ||
 150                  (val[0] == '#' as u8 && val[1] == '!' as u8));
 151     return val;
 152 }
 153
 154 fn consume_non_eol_whitespace(rdr: &mut StringReader) {
 155     while is_whitespace(rdr.curr) && !rdr.curr_is('\n') && !is_eof(rdr) {
 156         bump(rdr);
 157     }
 158 }
 159
 160 fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
 161     debug!(">>> blank-line comment");
 162     comments.push(Comment {
 163         style: BlankLine,
 164         lines: Vec::new(),
 165         pos: rdr.last_pos,
 166     });
 167 }
 168
 169 fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
 170                                            comments: &mut Vec<Comment>) {
 171     while is_whitespace(rdr.curr) && !is_eof(rdr) {
 172         if rdr.col == CharPos(0u) && rdr.curr_is('\n') {
 173             push_blank_line_comment(rdr, &mut *comments);
 174         }
 175         bump(rdr);
 176     }
 177 }
 178
 179
 180 fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
 181                         comments: &mut Vec<Comment>) {
 182     debug!(">>> shebang comment");
 183     let p = rdr.last_pos;
 184     debug!("<<< shebang comment");
 185     comments.push(Comment {
 186         style: if code_to_the_left { Trailing } else { Isolated },
 187         lines: vec!(read_one_line_comment(rdr)),
 188         pos: p
 189     });
 190 }
 191
 192 fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
 193                       comments: &mut Vec<Comment>) {
 194     debug!(">>> line comments");
 195     let p = rdr.last_pos;
 196     let mut lines: Vec<~str> = Vec::new();
 197     while rdr.curr_is('/') && nextch_is(rdr, '/') {
 198         let line = read_one_line_comment(rdr);
 199         debug!("{}", line);
 200         if is_doc_comment(line) { // doc-comments are not put in comments
 201             break;
 202         }
 203         lines.push(line);
 204         consume_non_eol_whitespace(rdr);
 205     }
 206     debug!("<<< line comments");
 207     if !lines.is_empty() {
 208         comments.push(Comment {
 209             style: if code_to_the_left { Trailing } else { Isolated },
 210             lines: lines,
 211             pos: p
 212         });
 213     }
 214 }
 215
 216 // Returns None if the first col chars of s contain a non-whitespace char.
 217 // Otherwise returns Some(k) where k is first char offset after that leading
 218 // whitespace.  Note k may be outside bounds of s.
 219 fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
 220     let len = s.len();
 221     let mut col = col.to_uint();
 222     let mut cursor: uint = 0;
 223     while col > 0 && cursor < len {
 224         let r: str::CharRange = s.char_range_at(cursor);
 225         if !r.ch.is_whitespace() {
 226             return None;
 227         }
 228         cursor = r.next;
 229         col -= 1;
 230     }
 231     return Some(cursor);
 232 }
 233
 234 fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<~str> ,
 235                                         s: ~str, col: CharPos) {
 236     let len = s.len();
 237     let s1 = match all_whitespace(s, col) {
 238         Some(col) => {
 239             if col < len {
 240                 s.slice(col, len).to_owned()
 241             } else {  "".to_owned() }
 242         }
 243         None => s,
 244     };
 245     debug!("pushing line: {}", s1);
 246     lines.push(s1);
 247 }
 248
 249 fn read_block_comment(rdr: &mut StringReader,
 250                       code_to_the_left: bool,
 251                       comments: &mut Vec<Comment> ) {
 252     debug!(">>> block comment");
 253     let p = rdr.last_pos;
 254     let mut lines: Vec<~str> = Vec::new();
 255     let col = rdr.col;
 256     bump(rdr);
 257     bump(rdr);
 258
 259     let mut curr_line = StrBuf::from_str("/*");
 260
 261     // doc-comments are not really comments, they are attributes
 262     if (rdr.curr_is('*') && !nextch_is(rdr, '*')) || rdr.curr_is('!') {
 263         while !(rdr.curr_is('*') && nextch_is(rdr, '/')) && !is_eof(rdr) {
 264             curr_line.push_char(rdr.curr.unwrap());
 265             bump(rdr);
 266         }
 267         if !is_eof(rdr) {
 268             curr_line.push_str("*/");
 269             bump(rdr);
 270             bump(rdr);
 271         }
 272         if !is_block_non_doc_comment(curr_line.as_slice()) {
 273             return
 274         }
 275         assert!(!curr_line.as_slice().contains_char('\n'));
 276         lines.push(curr_line.into_owned());
 277     } else {
 278         let mut level: int = 1;
 279         while level > 0 {
 280             debug!("=== block comment level {}", level);
 281             if is_eof(rdr) {
 282                 rdr.fatal("unterminated block comment".to_owned());
 283             }
 284             if rdr.curr_is('\n') {
 285                 trim_whitespace_prefix_and_push_line(&mut lines,
 286                                                      curr_line.into_owned(),
 287                                                      col);
 288                 curr_line = StrBuf::new();
 289                 bump(rdr);
 290             } else {
 291                 curr_line.push_char(rdr.curr.unwrap());
 292                 if rdr.curr_is('/') && nextch_is(rdr, '*') {
 293                     bump(rdr);
 294                     bump(rdr);
 295                     curr_line.push_char('*');
 296                     level += 1;
 297                 } else {
 298                     if rdr.curr_is('*') && nextch_is(rdr, '/') {
 299                         bump(rdr);
 300                         bump(rdr);
 301                         curr_line.push_char('/');
 302                         level -= 1;
 303                     } else { bump(rdr); }
 304                 }
 305             }
 306         }
 307         if curr_line.len() != 0 {
 308             trim_whitespace_prefix_and_push_line(&mut lines,
 309                                                  curr_line.into_owned(),
 310                                                  col);
 311         }
 312     }
 313
 314     let mut style = if code_to_the_left { Trailing } else { Isolated };
 315     consume_non_eol_whitespace(rdr);
 316     if !is_eof(rdr) && !rdr.curr_is('\n') && lines.len() == 1u {
 317         style = Mixed;
 318     }
 319     debug!("<<< block comment");
 320     comments.push(Comment {style: style, lines: lines, pos: p});
 321 }
 322
 323 fn peeking_at_comment(rdr: &StringReader) -> bool {
 324     return (rdr.curr_is('/') && nextch_is(rdr, '/')) ||
 325          (rdr.curr_is('/') && nextch_is(rdr, '*')) ||
 326          // consider shebangs comments, but not inner attributes
 327          (rdr.curr_is('#') && nextch_is(rdr, '!') &&
 328           !lexer::nextnextch_is(rdr, '['));
 329 }
 330
 331 fn consume_comment(rdr: &mut StringReader,
 332                    code_to_the_left: bool,
 333                    comments: &mut Vec<Comment> ) {
 334     debug!(">>> consume comment");
 335     if rdr.curr_is('/') && nextch_is(rdr, '/') {
 336         read_line_comments(rdr, code_to_the_left, comments);
 337     } else if rdr.curr_is('/') && nextch_is(rdr, '*') {
 338         read_block_comment(rdr, code_to_the_left, comments);
 339     } else if rdr.curr_is('#') && nextch_is(rdr, '!') {
 340         read_shebang_comment(rdr, code_to_the_left, comments);
 341     } else { fail!(); }
 342     debug!("<<< consume comment");
 343 }
 344
 345 #[deriving(Clone)]
 346 pub struct Literal {
 347     pub lit: ~str,
 348     pub pos: BytePos,
 349 }
 350
 351 // it appears this function is called only from pprust... that's
 352 // probably not a good thing.
 353 pub fn gather_comments_and_literals(span_diagnostic:
 354                                         &diagnostic::SpanHandler,
 355                                     path: ~str,
 356                                     srdr: &mut io::Reader)
 357                                  -> (Vec<Comment>, Vec<Literal>) {
 358     let src = srdr.read_to_end().unwrap();
 359     let src = str::from_utf8(src.as_slice()).unwrap().to_owned();
 360     let cm = CodeMap::new();
 361     let filemap = cm.new_filemap(path, src);
 362     let mut rdr = lexer::new_low_level_string_reader(span_diagnostic, filemap);
 363
 364     let mut comments: Vec<Comment> = Vec::new();
 365     let mut literals: Vec<Literal> = Vec::new();
 366     let mut first_read: bool = true;
 367     while !is_eof(&rdr) {
 368         loop {
 369             let mut code_to_the_left = !first_read;
 370             consume_non_eol_whitespace(&mut rdr);
 371             if rdr.curr_is('\n') {
 372                 code_to_the_left = false;
 373                 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
 374             }
 375             while peeking_at_comment(&rdr) {
 376                 consume_comment(&mut rdr, code_to_the_left, &mut comments);
 377                 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
 378             }
 379             break;
 380         }
 381
 382
 383         let bstart = rdr.last_pos;
 384         rdr.next_token();
 385         //discard, and look ahead; we're working with internal state
 386         let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
 387         if token::is_lit(&tok) {
 388             with_str_from(&rdr, bstart, |s| {
 389                 debug!("tok lit: {}", s);
 390                 literals.push(Literal {lit: s.to_owned(), pos: sp.lo});
 391             })
 392         } else {
 393             debug!("tok: {}", token::to_str(&tok));
 394         }
 395         first_read = false;
 396     }
 397
 398     (comments, literals)
 399 }
 400
 401 #[cfg(test)]
 402 mod test {
 403     use super::*;
 404
 405     #[test] fn test_block_doc_comment_1() {
 406         let comment = "/**\n * Test \n **  Test\n *   Test\n*/";
 407         let stripped = strip_doc_comment_decoration(comment);
 408         assert_eq!(stripped, " Test \n*  Test\n   Test".to_owned());
 409     }
 410
 411     #[test] fn test_block_doc_comment_2() {
 412         let comment = "/**\n * Test\n *  Test\n*/";
 413         let stripped = strip_doc_comment_decoration(comment);
 414         assert_eq!(stripped, " Test\n  Test".to_owned());
 415     }
 416
 417     #[test] fn test_block_doc_comment_3() {
 418         let comment = "/**\n let a: *int;\n *a = 5;\n*/";
 419         let stripped = strip_doc_comment_decoration(comment);
 420         assert_eq!(stripped, " let a: *int;\n *a = 5;".to_owned());
 421     }
 422
 423     #[test] fn test_block_doc_comment_4() {
 424         let comment = "/*******************\n test\n *********************/";
 425         let stripped = strip_doc_comment_decoration(comment);
 426         assert_eq!(stripped, " test".to_owned());
 427     }
 428
 429     #[test] fn test_line_doc_comment() {
 430         let stripped = strip_doc_comment_decoration("/// test");
 431         assert_eq!(stripped, " test".to_owned());
 432         let stripped = strip_doc_comment_decoration("///! test");
 433         assert_eq!(stripped, " test".to_owned());
 434         let stripped = strip_doc_comment_decoration("// test");
 435         assert_eq!(stripped, " test".to_owned());
 436         let stripped = strip_doc_comment_decoration("// test");
 437         assert_eq!(stripped, " test".to_owned());
 438         let stripped = strip_doc_comment_decoration("///test");
 439         assert_eq!(stripped, "test".to_owned());
 440         let stripped = strip_doc_comment_decoration("///!test");
 441         assert_eq!(stripped, "test".to_owned());
 442         let stripped = strip_doc_comment_decoration("//test");
 443         assert_eq!(stripped, "test".to_owned());
 444     }
 445 }