src/libsyntax/parse/lexer/comments.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 pub use self::CommentStyle::*;
  12
  13 use ast;
  14 use codemap::{BytePos, CharPos, CodeMap, Pos};
  15 use diagnostic;
  16 use parse::lexer::{is_whitespace, Reader};
  17 use parse::lexer::{StringReader, TokenAndSpan};
  18 use parse::lexer::is_block_doc_comment;
  19 use parse::lexer;
  20 use print::pprust;
  21
  22 use std::io;
  23 use std::str;
  24 use std::string::String;
  25 use std::uint;
  26
  27 #[derive(Clone, Copy, PartialEq)]
  28 pub enum CommentStyle {
  29     /// No code on either side of each line of the comment
  30     Isolated,
  31     /// Code exists to the left of the comment
  32     Trailing,
  33     /// Code before /* foo */ and after the comment
  34     Mixed,
  35     /// Just a manual blank line "\n\n", for layout
  36     BlankLine,
  37 }
  38
  39 #[derive(Clone)]
  40 pub struct Comment {
  41     pub style: CommentStyle,
  42     pub lines: Vec<String>,
  43     pub pos: BytePos,
  44 }
  45
  46 pub fn is_doc_comment(s: &str) -> bool {
  47     (s.starts_with("///") && super::is_doc_comment(s)) ||
  48     s.starts_with("//!") ||
  49     (s.starts_with("/**") && is_block_doc_comment(s)) ||
  50     s.starts_with("/*!")
  51 }
  52
  53 pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
  54     assert!(is_doc_comment(comment));
  55     if comment.starts_with("//!") || comment.starts_with("/*!") {
  56         ast::AttrInner
  57     } else {
  58         ast::AttrOuter
  59     }
  60 }
  61
  62 pub fn strip_doc_comment_decoration(comment: &str) -> String {
  63     /// remove whitespace-only lines from the start/end of lines
  64     fn vertical_trim(lines: Vec<String> ) -> Vec<String> {
  65         let mut i = 0u;
  66         let mut j = lines.len();
  67         // first line of all-stars should be omitted
  68         if lines.len() > 0 &&
  69                 lines[0].chars().all(|c| c == '*') {
  70             i += 1;
  71         }
  72         while i < j && lines[i].trim().is_empty() {
  73             i += 1;
  74         }
  75         // like the first, a last line of all stars should be omitted
  76         if j > i && lines[j - 1]
  77                          .chars()
  78                          .skip(1)
  79                          .all(|c| c == '*') {
  80             j -= 1;
  81         }
  82         while j > i && lines[j - 1].trim().is_empty() {
  83             j -= 1;
  84         }
  85         return lines[i..j].iter().map(|x| (*x).clone()).collect();
  86     }
  87
  88     /// remove a "[ \t]*\*" block from each line, if possible
  89     fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
  90         let mut i = uint::MAX;
  91         let mut can_trim = true;
  92         let mut first = true;
  93         for line in lines.iter() {
  94             for (j, c) in line.chars().enumerate() {
  95                 if j > i || !"* \t".contains_char(c) {
  96                     can_trim = false;
  97                     break;
  98                 }
  99                 if c == '*' {
 100                     if first {
 101                         i = j;
 102                         first = false;
 103                     } else if i != j {
 104                         can_trim = false;
 105                     }
 106                     break;
 107                 }
 108             }
 109             if i > line.len() {
 110                 can_trim = false;
 111             }
 112             if !can_trim {
 113                 break;
 114             }
 115         }
 116
 117         if can_trim {
 118             lines.iter().map(|line| {
 119                 line[i + 1..line.len()].to_string()
 120             }).collect()
 121         } else {
 122             lines
 123         }
 124     }
 125
 126     // one-line comments lose their prefix
 127     static ONLINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
 128     for prefix in ONLINERS.iter() {
 129         if comment.starts_with(*prefix) {
 130             return comment[prefix.len()..].to_string();
 131         }
 132     }
 133
 134     if comment.starts_with("/*") {
 135         let lines = comment[3u..comment.len() - 2u]
 136             .lines_any()
 137             .map(|s| s.to_string())
 138             .collect::<Vec<String> >();
 139
 140         let lines = vertical_trim(lines);
 141         let lines = horizontal_trim(lines);
 142
 143         return lines.connect("\n");
 144     }
 145
 146     panic!("not a doc-comment: {}", comment);
 147 }
 148
 149 fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
 150     debug!(">>> blank-line comment");
 151     comments.push(Comment {
 152         style: BlankLine,
 153         lines: Vec::new(),
 154         pos: rdr.last_pos,
 155     });
 156 }
 157
 158 fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
 159                                            comments: &mut Vec<Comment>) {
 160     while is_whitespace(rdr.curr) && !rdr.is_eof() {
 161         if rdr.col == CharPos(0u) && rdr.curr_is('\n') {
 162             push_blank_line_comment(rdr, &mut *comments);
 163         }
 164         rdr.bump();
 165     }
 166 }
 167
 168
 169 fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
 170                         comments: &mut Vec<Comment>) {
 171     debug!(">>> shebang comment");
 172     let p = rdr.last_pos;
 173     debug!("<<< shebang comment");
 174     comments.push(Comment {
 175         style: if code_to_the_left { Trailing } else { Isolated },
 176         lines: vec!(rdr.read_one_line_comment()),
 177         pos: p
 178     });
 179 }
 180
 181 fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
 182                       comments: &mut Vec<Comment>) {
 183     debug!(">>> line comments");
 184     let p = rdr.last_pos;
 185     let mut lines: Vec<String> = Vec::new();
 186     while rdr.curr_is('/') && rdr.nextch_is('/') {
 187         let line = rdr.read_one_line_comment();
 188         debug!("{}", line);
 189         // Doc comments are not put in comments.
 190         if is_doc_comment(line[]) {
 191             break;
 192         }
 193         lines.push(line);
 194         rdr.consume_non_eol_whitespace();
 195     }
 196     debug!("<<< line comments");
 197     if !lines.is_empty() {
 198         comments.push(Comment {
 199             style: if code_to_the_left { Trailing } else { Isolated },
 200             lines: lines,
 201             pos: p
 202         });
 203     }
 204 }
 205
 206 /// Returns None if the first col chars of s contain a non-whitespace char.
 207 /// Otherwise returns Some(k) where k is first char offset after that leading
 208 /// whitespace.  Note k may be outside bounds of s.
 209 fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
 210     let len = s.len();
 211     let mut col = col.to_uint();
 212     let mut cursor: uint = 0;
 213     while col > 0 && cursor < len {
 214         let r: str::CharRange = s.char_range_at(cursor);
 215         if !r.ch.is_whitespace() {
 216             return None;
 217         }
 218         cursor = r.next;
 219         col -= 1;
 220     }
 221     return Some(cursor);
 222 }
 223
 224 fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String> ,
 225                                         s: String, col: CharPos) {
 226     let len = s.len();
 227     let s1 = match all_whitespace(s[], col) {
 228         Some(col) => {
 229             if col < len {
 230                 s[col..len].to_string()
 231             } else {
 232                 "".to_string()
 233             }
 234         }
 235         None => s,
 236     };
 237     debug!("pushing line: {}", s1);
 238     lines.push(s1);
 239 }
 240
 241 fn read_block_comment(rdr: &mut StringReader,
 242                       code_to_the_left: bool,
 243                       comments: &mut Vec<Comment> ) {
 244     debug!(">>> block comment");
 245     let p = rdr.last_pos;
 246     let mut lines: Vec<String> = Vec::new();
 247     let col = rdr.col;
 248     rdr.bump();
 249     rdr.bump();
 250
 251     let mut curr_line = String::from_str("/*");
 252
 253     // doc-comments are not really comments, they are attributes
 254     if (rdr.curr_is('*') && !rdr.nextch_is('*')) || rdr.curr_is('!') {
 255         while !(rdr.curr_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
 256             curr_line.push(rdr.curr.unwrap());
 257             rdr.bump();
 258         }
 259         if !rdr.is_eof() {
 260             curr_line.push_str("*/");
 261             rdr.bump();
 262             rdr.bump();
 263         }
 264         if is_block_doc_comment(curr_line[]) {
 265             return
 266         }
 267         assert!(!curr_line.contains_char('\n'));
 268         lines.push(curr_line);
 269     } else {
 270         let mut level: int = 1;
 271         while level > 0 {
 272             debug!("=== block comment level {}", level);
 273             if rdr.is_eof() {
 274                 rdr.fatal("unterminated block comment");
 275             }
 276             if rdr.curr_is('\n') {
 277                 trim_whitespace_prefix_and_push_line(&mut lines,
 278                                                      curr_line,
 279                                                      col);
 280                 curr_line = String::new();
 281                 rdr.bump();
 282             } else {
 283                 curr_line.push(rdr.curr.unwrap());
 284                 if rdr.curr_is('/') && rdr.nextch_is('*') {
 285                     rdr.bump();
 286                     rdr.bump();
 287                     curr_line.push('*');
 288                     level += 1;
 289                 } else {
 290                     if rdr.curr_is('*') && rdr.nextch_is('/') {
 291                         rdr.bump();
 292                         rdr.bump();
 293                         curr_line.push('/');
 294                         level -= 1;
 295                     } else { rdr.bump(); }
 296                 }
 297             }
 298         }
 299         if curr_line.len() != 0 {
 300             trim_whitespace_prefix_and_push_line(&mut lines,
 301                                                  curr_line,
 302                                                  col);
 303         }
 304     }
 305
 306     let mut style = if code_to_the_left { Trailing } else { Isolated };
 307     rdr.consume_non_eol_whitespace();
 308     if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1u {
 309         style = Mixed;
 310     }
 311     debug!("<<< block comment");
 312     comments.push(Comment {style: style, lines: lines, pos: p});
 313 }
 314
 315
 316 fn consume_comment(rdr: &mut StringReader,
 317                    code_to_the_left: bool,
 318                    comments: &mut Vec<Comment> ) {
 319     debug!(">>> consume comment");
 320     if rdr.curr_is('/') && rdr.nextch_is('/') {
 321         read_line_comments(rdr, code_to_the_left, comments);
 322     } else if rdr.curr_is('/') && rdr.nextch_is('*') {
 323         read_block_comment(rdr, code_to_the_left, comments);
 324     } else if rdr.curr_is('#') && rdr.nextch_is('!') {
 325         read_shebang_comment(rdr, code_to_the_left, comments);
 326     } else { panic!(); }
 327     debug!("<<< consume comment");
 328 }
 329
 330 #[derive(Clone)]
 331 pub struct Literal {
 332     pub lit: String,
 333     pub pos: BytePos,
 334 }
 335
 336 // it appears this function is called only from pprust... that's
 337 // probably not a good thing.
 338 pub fn gather_comments_and_literals(span_diagnostic: &diagnostic::SpanHandler,
 339                                     path: String,
 340                                     srdr: &mut io::Reader)
 341                                  -> (Vec<Comment>, Vec<Literal>) {
 342     let src = srdr.read_to_end().unwrap();
 343     let src = String::from_utf8(src).unwrap();
 344     let cm = CodeMap::new();
 345     let filemap = cm.new_filemap(path, src);
 346     let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap);
 347
 348     let mut comments: Vec<Comment> = Vec::new();
 349     let mut literals: Vec<Literal> = Vec::new();
 350     let mut first_read: bool = true;
 351     while !rdr.is_eof() {
 352         loop {
 353             let mut code_to_the_left = !first_read;
 354             rdr.consume_non_eol_whitespace();
 355             if rdr.curr_is('\n') {
 356                 code_to_the_left = false;
 357                 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
 358             }
 359             while rdr.peeking_at_comment() {
 360                 consume_comment(&mut rdr, code_to_the_left, &mut comments);
 361                 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
 362             }
 363             break;
 364         }
 365
 366
 367         let bstart = rdr.last_pos;
 368         rdr.next_token();
 369         //discard, and look ahead; we're working with internal state
 370         let TokenAndSpan { tok, sp } = rdr.peek();
 371         if tok.is_lit() {
 372             rdr.with_str_from(bstart, |s| {
 373                 debug!("tok lit: {}", s);
 374                 literals.push(Literal {lit: s.to_string(), pos: sp.lo});
 375             })
 376         } else {
 377             debug!("tok: {}", pprust::token_to_string(&tok));
 378         }
 379         first_read = false;
 380     }
 381
 382     (comments, literals)
 383 }
 384
 385 #[cfg(test)]
 386 mod test {
 387     use super::*;
 388
 389     #[test] fn test_block_doc_comment_1() {
 390         let comment = "/**\n * Test \n **  Test\n *   Test\n*/";
 391         let stripped = strip_doc_comment_decoration(comment);
 392         assert_eq!(stripped, " Test \n*  Test\n   Test");
 393     }
 394
 395     #[test] fn test_block_doc_comment_2() {
 396         let comment = "/**\n * Test\n *  Test\n*/";
 397         let stripped = strip_doc_comment_decoration(comment);
 398         assert_eq!(stripped, " Test\n  Test");
 399     }
 400
 401     #[test] fn test_block_doc_comment_3() {
 402         let comment = "/**\n let a: *int;\n *a = 5;\n*/";
 403         let stripped = strip_doc_comment_decoration(comment);
 404         assert_eq!(stripped, " let a: *int;\n *a = 5;");
 405     }
 406
 407     #[test] fn test_block_doc_comment_4() {
 408         let comment = "/*******************\n test\n *********************/";
 409         let stripped = strip_doc_comment_decoration(comment);
 410         assert_eq!(stripped, " test");
 411     }
 412
 413     #[test] fn test_line_doc_comment() {
 414         let stripped = strip_doc_comment_decoration("/// test");
 415         assert_eq!(stripped, " test");
 416         let stripped = strip_doc_comment_decoration("///! test");
 417         assert_eq!(stripped, " test");
 418         let stripped = strip_doc_comment_decoration("// test");
 419         assert_eq!(stripped, " test");
 420         let stripped = strip_doc_comment_decoration("// test");
 421         assert_eq!(stripped, " test");
 422         let stripped = strip_doc_comment_decoration("///test");
 423         assert_eq!(stripped, "test");
 424         let stripped = strip_doc_comment_decoration("///!test");
 425         assert_eq!(stripped, "test");
 426         let stripped = strip_doc_comment_decoration("//test");
 427         assert_eq!(stripped, "test");
 428     }
 429 }