src/libsyntax/parse/lexer/comments.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 pub use self::CommentStyle::*;
  12
  13 use ast;
  14 use codemap::{BytePos, CharPos, CodeMap, Pos};
  15 use errors;
  16 use parse::lexer::is_block_doc_comment;
  17 use parse::lexer::{StringReader, TokenAndSpan};
  18 use parse::lexer::{is_whitespace, Reader};
  19 use parse::lexer;
  20 use print::pprust;
  21 use str::char_at;
  22
  23 use std::io::Read;
  24 use std::usize;
  25
  26 #[derive(Clone, Copy, PartialEq)]
  27 pub enum CommentStyle {
  28     /// No code on either side of each line of the comment
  29     Isolated,
  30     /// Code exists to the left of the comment
  31     Trailing,
  32     /// Code before /* foo */ and after the comment
  33     Mixed,
  34     /// Just a manual blank line "\n\n", for layout
  35     BlankLine,
  36 }
  37
  38 #[derive(Clone)]
  39 pub struct Comment {
  40     pub style: CommentStyle,
  41     pub lines: Vec<String>,
  42     pub pos: BytePos,
  43 }
  44
  45 pub fn is_doc_comment(s: &str) -> bool {
  46     (s.starts_with("///") && super::is_doc_comment(s)) ||
  47     s.starts_with("//!") ||
  48     (s.starts_with("/**") && is_block_doc_comment(s)) ||
  49     s.starts_with("/*!")
  50 }
  51
  52 pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
  53     assert!(is_doc_comment(comment));
  54     if comment.starts_with("//!") || comment.starts_with("/*!") {
  55         ast::AttrStyle::Inner
  56     } else {
  57         ast::AttrStyle::Outer
  58     }
  59 }
  60
  61 pub fn strip_doc_comment_decoration(comment: &str) -> String {
  62     /// remove whitespace-only lines from the start/end of lines
  63     fn vertical_trim(lines: Vec<String>) -> Vec<String> {
  64         let mut i = 0;
  65         let mut j = lines.len();
  66         // first line of all-stars should be omitted
  67         if !lines.is_empty() &&
  68                 lines[0].chars().all(|c| c == '*') {
  69             i += 1;
  70         }
  71         while i < j && lines[i].trim().is_empty() {
  72             i += 1;
  73         }
  74         // like the first, a last line of all stars should be omitted
  75         if j > i && lines[j - 1]
  76                          .chars()
  77                          .skip(1)
  78                          .all(|c| c == '*') {
  79             j -= 1;
  80         }
  81         while j > i && lines[j - 1].trim().is_empty() {
  82             j -= 1;
  83         }
  84         lines[i..j].iter().cloned().collect()
  85     }
  86
  87     /// remove a "[ \t]*\*" block from each line, if possible
  88     fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
  89         let mut i = usize::MAX;
  90         let mut can_trim = true;
  91         let mut first = true;
  92         for line in &lines {
  93             for (j, c) in line.chars().enumerate() {
  94                 if j > i || !"* \t".contains(c) {
  95                     can_trim = false;
  96                     break;
  97                 }
  98                 if c == '*' {
  99                     if first {
 100                         i = j;
 101                         first = false;
 102                     } else if i != j {
 103                         can_trim = false;
 104                     }
 105                     break;
 106                 }
 107             }
 108             if i > line.len() {
 109                 can_trim = false;
 110             }
 111             if !can_trim {
 112                 break;
 113             }
 114         }
 115
 116         if can_trim {
 117             lines.iter().map(|line| {
 118                 (&line[i + 1..line.len()]).to_string()
 119             }).collect()
 120         } else {
 121             lines
 122         }
 123     }
 124
 125     // one-line comments lose their prefix
 126     const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
 127     for prefix in ONELINERS {
 128         if comment.starts_with(*prefix) {
 129             return (&comment[prefix.len()..]).to_string();
 130         }
 131     }
 132
 133     if comment.starts_with("/*") {
 134         let lines = comment[3..comment.len() - 2]
 135             .lines()
 136             .map(|s| s.to_string())
 137             .collect::<Vec<String> >();
 138
 139         let lines = vertical_trim(lines);
 140         let lines = horizontal_trim(lines);
 141
 142         return lines.join("\n");
 143     }
 144
 145     panic!("not a doc-comment: {}", comment);
 146 }
 147
 148 fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
 149     debug!(">>> blank-line comment");
 150     comments.push(Comment {
 151         style: BlankLine,
 152         lines: Vec::new(),
 153         pos: rdr.last_pos,
 154     });
 155 }
 156
 157 fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
 158                                            comments: &mut Vec<Comment>) {
 159     while is_whitespace(rdr.curr) && !rdr.is_eof() {
 160         if rdr.col == CharPos(0) && rdr.curr_is('\n') {
 161             push_blank_line_comment(rdr, &mut *comments);
 162         }
 163         rdr.bump();
 164     }
 165 }
 166
 167
 168 fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
 169                         comments: &mut Vec<Comment>) {
 170     debug!(">>> shebang comment");
 171     let p = rdr.last_pos;
 172     debug!("<<< shebang comment");
 173     comments.push(Comment {
 174         style: if code_to_the_left { Trailing } else { Isolated },
 175         lines: vec!(rdr.read_one_line_comment()),
 176         pos: p
 177     });
 178 }
 179
 180 fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
 181                       comments: &mut Vec<Comment>) {
 182     debug!(">>> line comments");
 183     let p = rdr.last_pos;
 184     let mut lines: Vec<String> = Vec::new();
 185     while rdr.curr_is('/') && rdr.nextch_is('/') {
 186         let line = rdr.read_one_line_comment();
 187         debug!("{}", line);
 188         // Doc comments are not put in comments.
 189         if is_doc_comment(&line[..]) {
 190             break;
 191         }
 192         lines.push(line);
 193         rdr.consume_non_eol_whitespace();
 194     }
 195     debug!("<<< line comments");
 196     if !lines.is_empty() {
 197         comments.push(Comment {
 198             style: if code_to_the_left { Trailing } else { Isolated },
 199             lines: lines,
 200             pos: p
 201         });
 202     }
 203 }
 204
 205 /// Returns None if the first col chars of s contain a non-whitespace char.
 206 /// Otherwise returns Some(k) where k is first char offset after that leading
 207 /// whitespace.  Note k may be outside bounds of s.
 208 fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
 209     let len = s.len();
 210     let mut col = col.to_usize();
 211     let mut cursor: usize = 0;
 212     while col > 0 && cursor < len {
 213         let ch = char_at(s, cursor);
 214         if !ch.is_whitespace() {
 215             return None;
 216         }
 217         cursor += ch.len_utf8();
 218         col -= 1;
 219     }
 220     return Some(cursor);
 221 }
 222
 223 fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String> ,
 224                                         s: String, col: CharPos) {
 225     let len = s.len();
 226     let s1 = match all_whitespace(&s[..], col) {
 227         Some(col) => {
 228             if col < len {
 229                 (&s[col..len]).to_string()
 230             } else {
 231                 "".to_string()
 232             }
 233         }
 234         None => s,
 235     };
 236     debug!("pushing line: {}", s1);
 237     lines.push(s1);
 238 }
 239
 240 fn read_block_comment(rdr: &mut StringReader,
 241                       code_to_the_left: bool,
 242                       comments: &mut Vec<Comment> ) {
 243     debug!(">>> block comment");
 244     let p = rdr.last_pos;
 245     let mut lines: Vec<String> = Vec::new();
 246     let col = rdr.col;
 247     rdr.bump();
 248     rdr.bump();
 249
 250     let mut curr_line = String::from("/*");
 251
 252     // doc-comments are not really comments, they are attributes
 253     if (rdr.curr_is('*') && !rdr.nextch_is('*')) || rdr.curr_is('!') {
 254         while !(rdr.curr_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
 255             curr_line.push(rdr.curr.unwrap());
 256             rdr.bump();
 257         }
 258         if !rdr.is_eof() {
 259             curr_line.push_str("*/");
 260             rdr.bump();
 261             rdr.bump();
 262         }
 263         if is_block_doc_comment(&curr_line[..]) {
 264             return
 265         }
 266         assert!(!curr_line.contains('\n'));
 267         lines.push(curr_line);
 268     } else {
 269         let mut level: isize = 1;
 270         while level > 0 {
 271             debug!("=== block comment level {}", level);
 272             if rdr.is_eof() {
 273                 panic!(rdr.fatal("unterminated block comment"));
 274             }
 275             if rdr.curr_is('\n') {
 276                 trim_whitespace_prefix_and_push_line(&mut lines,
 277                                                      curr_line,
 278                                                      col);
 279                 curr_line = String::new();
 280                 rdr.bump();
 281             } else {
 282                 curr_line.push(rdr.curr.unwrap());
 283                 if rdr.curr_is('/') && rdr.nextch_is('*') {
 284                     rdr.bump();
 285                     rdr.bump();
 286                     curr_line.push('*');
 287                     level += 1;
 288                 } else {
 289                     if rdr.curr_is('*') && rdr.nextch_is('/') {
 290                         rdr.bump();
 291                         rdr.bump();
 292                         curr_line.push('/');
 293                         level -= 1;
 294                     } else { rdr.bump(); }
 295                 }
 296             }
 297         }
 298         if !curr_line.is_empty() {
 299             trim_whitespace_prefix_and_push_line(&mut lines,
 300                                                  curr_line,
 301                                                  col);
 302         }
 303     }
 304
 305     let mut style = if code_to_the_left { Trailing } else { Isolated };
 306     rdr.consume_non_eol_whitespace();
 307     if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1 {
 308         style = Mixed;
 309     }
 310     debug!("<<< block comment");
 311     comments.push(Comment {style: style, lines: lines, pos: p});
 312 }
 313
 314
 315 fn consume_comment(rdr: &mut StringReader,
 316                    code_to_the_left: bool,
 317                    comments: &mut Vec<Comment> ) {
 318     debug!(">>> consume comment");
 319     if rdr.curr_is('/') && rdr.nextch_is('/') {
 320         read_line_comments(rdr, code_to_the_left, comments);
 321     } else if rdr.curr_is('/') && rdr.nextch_is('*') {
 322         read_block_comment(rdr, code_to_the_left, comments);
 323     } else if rdr.curr_is('#') && rdr.nextch_is('!') {
 324         read_shebang_comment(rdr, code_to_the_left, comments);
 325     } else { panic!(); }
 326     debug!("<<< consume comment");
 327 }
 328
 329 #[derive(Clone)]
 330 pub struct Literal {
 331     pub lit: String,
 332     pub pos: BytePos,
 333 }
 334
 335 // it appears this function is called only from pprust... that's
 336 // probably not a good thing.
 337 pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler,
 338                                     path: String,
 339                                     srdr: &mut Read)
 340                                  -> (Vec<Comment>, Vec<Literal>) {
 341     let mut src = Vec::new();
 342     srdr.read_to_end(&mut src).unwrap();
 343     let src = String::from_utf8(src).unwrap();
 344     let cm = CodeMap::new();
 345     let filemap = cm.new_filemap(path, src);
 346     let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap);
 347
 348     let mut comments: Vec<Comment> = Vec::new();
 349     let mut literals: Vec<Literal> = Vec::new();
 350     let mut first_read: bool = true;
 351     while !rdr.is_eof() {
 352         loop {
 353             let mut code_to_the_left = !first_read;
 354             rdr.consume_non_eol_whitespace();
 355             if rdr.curr_is('\n') {
 356                 code_to_the_left = false;
 357                 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
 358             }
 359             while rdr.peeking_at_comment() {
 360                 consume_comment(&mut rdr, code_to_the_left, &mut comments);
 361                 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
 362             }
 363             break;
 364         }
 365
 366
 367         let bstart = rdr.last_pos;
 368         rdr.next_token();
 369         //discard, and look ahead; we're working with internal state
 370         let TokenAndSpan { tok, sp } = rdr.peek();
 371         if tok.is_lit() {
 372             rdr.with_str_from(bstart, |s| {
 373                 debug!("tok lit: {}", s);
 374                 literals.push(Literal {lit: s.to_string(), pos: sp.lo});
 375             })
 376         } else {
 377             debug!("tok: {}", pprust::token_to_string(&tok));
 378         }
 379         first_read = false;
 380     }
 381
 382     (comments, literals)
 383 }
 384
 385 #[cfg(test)]
 386 mod tests {
 387     use super::*;
 388
 389     #[test] fn test_block_doc_comment_1() {
 390         let comment = "/**\n * Test \n **  Test\n *   Test\n*/";
 391         let stripped = strip_doc_comment_decoration(comment);
 392         assert_eq!(stripped, " Test \n*  Test\n   Test");
 393     }
 394
 395     #[test] fn test_block_doc_comment_2() {
 396         let comment = "/**\n * Test\n *  Test\n*/";
 397         let stripped = strip_doc_comment_decoration(comment);
 398         assert_eq!(stripped, " Test\n  Test");
 399     }
 400
 401     #[test] fn test_block_doc_comment_3() {
 402         let comment = "/**\n let a: *i32;\n *a = 5;\n*/";
 403         let stripped = strip_doc_comment_decoration(comment);
 404         assert_eq!(stripped, " let a: *i32;\n *a = 5;");
 405     }
 406
 407     #[test] fn test_block_doc_comment_4() {
 408         let comment = "/*******************\n test\n *********************/";
 409         let stripped = strip_doc_comment_decoration(comment);
 410         assert_eq!(stripped, " test");
 411     }
 412
 413     #[test] fn test_line_doc_comment() {
 414         let stripped = strip_doc_comment_decoration("/// test");
 415         assert_eq!(stripped, " test");
 416         let stripped = strip_doc_comment_decoration("///! test");
 417         assert_eq!(stripped, " test");
 418         let stripped = strip_doc_comment_decoration("// test");
 419         assert_eq!(stripped, " test");
 420         let stripped = strip_doc_comment_decoration("// test");
 421         assert_eq!(stripped, " test");
 422         let stripped = strip_doc_comment_decoration("///test");
 423         assert_eq!(stripped, "test");
 424         let stripped = strip_doc_comment_decoration("///!test");
 425         assert_eq!(stripped, "test");
 426         let stripped = strip_doc_comment_decoration("//test");
 427         assert_eq!(stripped, "test");
 428     }
 429 }