src/libsyntax/parse/lexer/comments.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 pub use self::CommentStyle::*;
  12
  13 use ast;
  14 use codemap::{BytePos, CharPos, CodeMap, Pos};
  15 use diagnostic;
  16 use parse::lexer::{is_whitespace, Reader};
  17 use parse::lexer::{StringReader, TokenAndSpan};
  18 use parse::lexer::is_block_doc_comment;
  19 use parse::lexer;
  20 use print::pprust;
  21
  22 use std::io;
  23 use std::str;
  24 use std::string::String;
  25 use std::uint;
  26
  27 #[deriving(Clone, PartialEq)]
  28 pub enum CommentStyle {
  29     /// No code on either side of each line of the comment
  30     Isolated,
  31     /// Code exists to the left of the comment
  32     Trailing,
  33     /// Code before /* foo */ and after the comment
  34     Mixed,
  35     /// Just a manual blank line "\n\n", for layout
  36     BlankLine,
  37 }
  38
  39 impl Copy for CommentStyle {}
  40
  41 #[deriving(Clone)]
  42 pub struct Comment {
  43     pub style: CommentStyle,
  44     pub lines: Vec<String>,
  45     pub pos: BytePos,
  46 }
  47
  48 pub fn is_doc_comment(s: &str) -> bool {
  49     (s.starts_with("///") && super::is_doc_comment(s)) ||
  50     s.starts_with("//!") ||
  51     (s.starts_with("/**") && is_block_doc_comment(s)) ||
  52     s.starts_with("/*!")
  53 }
  54
  55 pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
  56     assert!(is_doc_comment(comment));
  57     if comment.starts_with("//!") || comment.starts_with("/*!") {
  58         ast::AttrInner
  59     } else {
  60         ast::AttrOuter
  61     }
  62 }
  63
  64 pub fn strip_doc_comment_decoration(comment: &str) -> String {
  65     /// remove whitespace-only lines from the start/end of lines
  66     fn vertical_trim(lines: Vec<String> ) -> Vec<String> {
  67         let mut i = 0u;
  68         let mut j = lines.len();
  69         // first line of all-stars should be omitted
  70         if lines.len() > 0 &&
  71                 lines[0].chars().all(|c| c == '*') {
  72             i += 1;
  73         }
  74         while i < j && lines[i].trim().is_empty() {
  75             i += 1;
  76         }
  77         // like the first, a last line of all stars should be omitted
  78         if j > i && lines[j - 1]
  79                          .chars()
  80                          .skip(1)
  81                          .all(|c| c == '*') {
  82             j -= 1;
  83         }
  84         while j > i && lines[j - 1].trim().is_empty() {
  85             j -= 1;
  86         }
  87         return lines.slice(i, j).iter().map(|x| (*x).clone()).collect();
  88     }
  89
  90     /// remove a "[ \t]*\*" block from each line, if possible
  91     fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
  92         let mut i = uint::MAX;
  93         let mut can_trim = true;
  94         let mut first = true;
  95         for line in lines.iter() {
  96             for (j, c) in line.chars().enumerate() {
  97                 if j > i || !"* \t".contains_char(c) {
  98                     can_trim = false;
  99                     break;
 100                 }
 101                 if c == '*' {
 102                     if first {
 103                         i = j;
 104                         first = false;
 105                     } else if i != j {
 106                         can_trim = false;
 107                     }
 108                     break;
 109                 }
 110             }
 111             if i > line.len() {
 112                 can_trim = false;
 113             }
 114             if !can_trim {
 115                 break;
 116             }
 117         }
 118
 119         if can_trim {
 120             lines.iter().map(|line| {
 121                 line.slice(i + 1, line.len()).to_string()
 122             }).collect()
 123         } else {
 124             lines
 125         }
 126     }
 127
 128     // one-line comments lose their prefix
 129     static ONLINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
 130     for prefix in ONLINERS.iter() {
 131         if comment.starts_with(*prefix) {
 132             return comment.slice_from(prefix.len()).to_string();
 133         }
 134     }
 135
 136     if comment.starts_with("/*") {
 137         let lines = comment.slice(3u, comment.len() - 2u)
 138             .lines_any()
 139             .map(|s| s.to_string())
 140             .collect::<Vec<String> >();
 141
 142         let lines = vertical_trim(lines);
 143         let lines = horizontal_trim(lines);
 144
 145         return lines.connect("\n");
 146     }
 147
 148     panic!("not a doc-comment: {}", comment);
 149 }
 150
 151 fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
 152     debug!(">>> blank-line comment");
 153     comments.push(Comment {
 154         style: BlankLine,
 155         lines: Vec::new(),
 156         pos: rdr.last_pos,
 157     });
 158 }
 159
 160 fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
 161                                            comments: &mut Vec<Comment>) {
 162     while is_whitespace(rdr.curr) && !rdr.is_eof() {
 163         if rdr.col == CharPos(0u) && rdr.curr_is('\n') {
 164             push_blank_line_comment(rdr, &mut *comments);
 165         }
 166         rdr.bump();
 167     }
 168 }
 169
 170
 171 fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
 172                         comments: &mut Vec<Comment>) {
 173     debug!(">>> shebang comment");
 174     let p = rdr.last_pos;
 175     debug!("<<< shebang comment");
 176     comments.push(Comment {
 177         style: if code_to_the_left { Trailing } else { Isolated },
 178         lines: vec!(rdr.read_one_line_comment()),
 179         pos: p
 180     });
 181 }
 182
 183 fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
 184                       comments: &mut Vec<Comment>) {
 185     debug!(">>> line comments");
 186     let p = rdr.last_pos;
 187     let mut lines: Vec<String> = Vec::new();
 188     while rdr.curr_is('/') && rdr.nextch_is('/') {
 189         let line = rdr.read_one_line_comment();
 190         debug!("{}", line);
 191         // Doc comments are not put in comments.
 192         if is_doc_comment(line.as_slice()) {
 193             break;
 194         }
 195         lines.push(line);
 196         rdr.consume_non_eol_whitespace();
 197     }
 198     debug!("<<< line comments");
 199     if !lines.is_empty() {
 200         comments.push(Comment {
 201             style: if code_to_the_left { Trailing } else { Isolated },
 202             lines: lines,
 203             pos: p
 204         });
 205     }
 206 }
 207
 208 /// Returns None if the first col chars of s contain a non-whitespace char.
 209 /// Otherwise returns Some(k) where k is first char offset after that leading
 210 /// whitespace.  Note k may be outside bounds of s.
 211 fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
 212     let len = s.len();
 213     let mut col = col.to_uint();
 214     let mut cursor: uint = 0;
 215     while col > 0 && cursor < len {
 216         let r: str::CharRange = s.char_range_at(cursor);
 217         if !r.ch.is_whitespace() {
 218             return None;
 219         }
 220         cursor = r.next;
 221         col -= 1;
 222     }
 223     return Some(cursor);
 224 }
 225
 226 fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String> ,
 227                                         s: String, col: CharPos) {
 228     let len = s.len();
 229     let s1 = match all_whitespace(s.as_slice(), col) {
 230         Some(col) => {
 231             if col < len {
 232                 s.slice(col, len).to_string()
 233             } else {
 234                 "".to_string()
 235             }
 236         }
 237         None => s,
 238     };
 239     debug!("pushing line: {}", s1);
 240     lines.push(s1);
 241 }
 242
 243 fn read_block_comment(rdr: &mut StringReader,
 244                       code_to_the_left: bool,
 245                       comments: &mut Vec<Comment> ) {
 246     debug!(">>> block comment");
 247     let p = rdr.last_pos;
 248     let mut lines: Vec<String> = Vec::new();
 249     let col = rdr.col;
 250     rdr.bump();
 251     rdr.bump();
 252
 253     let mut curr_line = String::from_str("/*");
 254
 255     // doc-comments are not really comments, they are attributes
 256     if (rdr.curr_is('*') && !rdr.nextch_is('*')) || rdr.curr_is('!') {
 257         while !(rdr.curr_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
 258             curr_line.push(rdr.curr.unwrap());
 259             rdr.bump();
 260         }
 261         if !rdr.is_eof() {
 262             curr_line.push_str("*/");
 263             rdr.bump();
 264             rdr.bump();
 265         }
 266         if is_block_doc_comment(curr_line.as_slice()) {
 267             return
 268         }
 269         assert!(!curr_line.contains_char('\n'));
 270         lines.push(curr_line);
 271     } else {
 272         let mut level: int = 1;
 273         while level > 0 {
 274             debug!("=== block comment level {}", level);
 275             if rdr.is_eof() {
 276                 rdr.fatal("unterminated block comment");
 277             }
 278             if rdr.curr_is('\n') {
 279                 trim_whitespace_prefix_and_push_line(&mut lines,
 280                                                      curr_line,
 281                                                      col);
 282                 curr_line = String::new();
 283                 rdr.bump();
 284             } else {
 285                 curr_line.push(rdr.curr.unwrap());
 286                 if rdr.curr_is('/') && rdr.nextch_is('*') {
 287                     rdr.bump();
 288                     rdr.bump();
 289                     curr_line.push('*');
 290                     level += 1;
 291                 } else {
 292                     if rdr.curr_is('*') && rdr.nextch_is('/') {
 293                         rdr.bump();
 294                         rdr.bump();
 295                         curr_line.push('/');
 296                         level -= 1;
 297                     } else { rdr.bump(); }
 298                 }
 299             }
 300         }
 301         if curr_line.len() != 0 {
 302             trim_whitespace_prefix_and_push_line(&mut lines,
 303                                                  curr_line,
 304                                                  col);
 305         }
 306     }
 307
 308     let mut style = if code_to_the_left { Trailing } else { Isolated };
 309     rdr.consume_non_eol_whitespace();
 310     if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1u {
 311         style = Mixed;
 312     }
 313     debug!("<<< block comment");
 314     comments.push(Comment {style: style, lines: lines, pos: p});
 315 }
 316
 317
 318 fn consume_comment(rdr: &mut StringReader,
 319                    code_to_the_left: bool,
 320                    comments: &mut Vec<Comment> ) {
 321     debug!(">>> consume comment");
 322     if rdr.curr_is('/') && rdr.nextch_is('/') {
 323         read_line_comments(rdr, code_to_the_left, comments);
 324     } else if rdr.curr_is('/') && rdr.nextch_is('*') {
 325         read_block_comment(rdr, code_to_the_left, comments);
 326     } else if rdr.curr_is('#') && rdr.nextch_is('!') {
 327         read_shebang_comment(rdr, code_to_the_left, comments);
 328     } else { panic!(); }
 329     debug!("<<< consume comment");
 330 }
 331
 332 #[deriving(Clone)]
 333 pub struct Literal {
 334     pub lit: String,
 335     pub pos: BytePos,
 336 }
 337
 338 // it appears this function is called only from pprust... that's
 339 // probably not a good thing.
 340 pub fn gather_comments_and_literals(span_diagnostic: &diagnostic::SpanHandler,
 341                                     path: String,
 342                                     srdr: &mut io::Reader)
 343                                  -> (Vec<Comment>, Vec<Literal>) {
 344     let src = srdr.read_to_end().unwrap();
 345     let src = String::from_utf8(src).unwrap();
 346     let cm = CodeMap::new();
 347     let filemap = cm.new_filemap(path, src);
 348     let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap);
 349
 350     let mut comments: Vec<Comment> = Vec::new();
 351     let mut literals: Vec<Literal> = Vec::new();
 352     let mut first_read: bool = true;
 353     while !rdr.is_eof() {
 354         loop {
 355             let mut code_to_the_left = !first_read;
 356             rdr.consume_non_eol_whitespace();
 357             if rdr.curr_is('\n') {
 358                 code_to_the_left = false;
 359                 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
 360             }
 361             while rdr.peeking_at_comment() {
 362                 consume_comment(&mut rdr, code_to_the_left, &mut comments);
 363                 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
 364             }
 365             break;
 366         }
 367
 368
 369         let bstart = rdr.last_pos;
 370         rdr.next_token();
 371         //discard, and look ahead; we're working with internal state
 372         let TokenAndSpan { tok, sp } = rdr.peek();
 373         if tok.is_lit() {
 374             rdr.with_str_from(bstart, |s| {
 375                 debug!("tok lit: {}", s);
 376                 literals.push(Literal {lit: s.to_string(), pos: sp.lo});
 377             })
 378         } else {
 379             debug!("tok: {}", pprust::token_to_string(&tok));
 380         }
 381         first_read = false;
 382     }
 383
 384     (comments, literals)
 385 }
 386
 387 #[cfg(test)]
 388 mod test {
 389     use super::*;
 390
 391     #[test] fn test_block_doc_comment_1() {
 392         let comment = "/**\n * Test \n **  Test\n *   Test\n*/";
 393         let stripped = strip_doc_comment_decoration(comment);
 394         assert_eq!(stripped, " Test \n*  Test\n   Test");
 395     }
 396
 397     #[test] fn test_block_doc_comment_2() {
 398         let comment = "/**\n * Test\n *  Test\n*/";
 399         let stripped = strip_doc_comment_decoration(comment);
 400         assert_eq!(stripped, " Test\n  Test");
 401     }
 402
 403     #[test] fn test_block_doc_comment_3() {
 404         let comment = "/**\n let a: *int;\n *a = 5;\n*/";
 405         let stripped = strip_doc_comment_decoration(comment);
 406         assert_eq!(stripped, " let a: *int;\n *a = 5;");
 407     }
 408
 409     #[test] fn test_block_doc_comment_4() {
 410         let comment = "/*******************\n test\n *********************/";
 411         let stripped = strip_doc_comment_decoration(comment);
 412         assert_eq!(stripped, " test");
 413     }
 414
 415     #[test] fn test_line_doc_comment() {
 416         let stripped = strip_doc_comment_decoration("/// test");
 417         assert_eq!(stripped, " test");
 418         let stripped = strip_doc_comment_decoration("///! test");
 419         assert_eq!(stripped, " test");
 420         let stripped = strip_doc_comment_decoration("// test");
 421         assert_eq!(stripped, " test");
 422         let stripped = strip_doc_comment_decoration("// test");
 423         assert_eq!(stripped, " test");
 424         let stripped = strip_doc_comment_decoration("///test");
 425         assert_eq!(stripped, "test");
 426         let stripped = strip_doc_comment_decoration("///!test");
 427         assert_eq!(stripped, "test");
 428         let stripped = strip_doc_comment_decoration("//test");
 429         assert_eq!(stripped, "test");
 430     }
 431 }