src/comment.rs

   1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 // Format comments.
  12
  13 use std::iter;
  14
  15 use Indent;
  16 use config::Config;
  17 use string::{StringFormat, rewrite_string};
  18
  19 pub fn rewrite_comment(orig: &str,
  20                        block_style: bool,
  21                        width: usize,
  22                        offset: Indent,
  23                        config: &Config)
  24                        -> Option<String> {
  25     let s = orig.trim();
  26
  27     // Edge case: block comments. Let's not trim their lines (for now).
  28     let (opener, closer, line_start) = if block_style {
  29         ("/* ", " */", " * ")
  30     } else if orig.starts_with("///") {
  31         ("/// ", "", "/// ")
  32     } else if orig.starts_with("//!") {
  33         ("//! ", "", "//! ")
  34     } else {
  35         ("// ", "", "// ")
  36     };
  37
  38     let max_chars = width.checked_sub(closer.len() + opener.len()).unwrap_or(1);
  39
  40     let fmt = StringFormat {
  41         opener: "",
  42         closer: "",
  43         line_start: line_start,
  44         line_end: "",
  45         width: max_chars,
  46         offset: offset + (opener.len() - line_start.len()),
  47         trim_end: true,
  48         config: config,
  49     };
  50
  51     let indent_str = offset.to_string(config);
  52     let line_breaks = s.chars().filter(|&c| c == '\n').count();
  53
  54     let lines = s.lines()
  55                  .enumerate()
  56                  .map(|(i, mut line)| {
  57                      line = line.trim();
  58                      // Drop old closer.
  59                      if i == line_breaks && line.ends_with("*/") && !line.starts_with("//") {
  60                          line = &line[..(line.len() - 2)];
  61                      }
  62
  63                      line.trim_right()
  64                  })
  65                  .map(left_trim_comment_line)
  66                  .map(|line| {
  67                      if line_breaks == 0 {
  68                          line.trim_left()
  69                      } else {
  70                          line
  71                      }
  72                  });
  73
  74     let mut result = opener.to_owned();
  75     let mut first = true;
  76
  77     for line in lines {
  78         if !first {
  79             result.push('\n');
  80             result.push_str(&indent_str);
  81             result.push_str(line_start);
  82         }
  83
  84         if config.wrap_comments && line.len() > max_chars {
  85             let rewrite = try_opt!(rewrite_string(line, &fmt));
  86             result.push_str(&rewrite);
  87         } else {
  88             if line.len() == 0 {
  89                 // Remove space if this is an empty comment or a doc comment.
  90                 result.pop();
  91             }
  92             result.push_str(line);
  93         }
  94
  95         first = false;
  96     }
  97
  98     result.push_str(closer);
  99
 100     Some(result)
 101 }
 102
 103 fn left_trim_comment_line(line: &str) -> &str {
 104     if line.starts_with("//! ") || line.starts_with("/// ") {
 105         &line[4..]
 106     } else if line.starts_with("/* ") || line.starts_with("// ") || line.starts_with("//!") ||
 107        line.starts_with("///") {
 108         &line[3..]
 109     } else if line.starts_with("/*") || line.starts_with("* ") || line.starts_with("//") {
 110         &line[2..]
 111     } else if line.starts_with("*") {
 112         &line[1..]
 113     } else {
 114         line
 115     }
 116 }
 117
 118 pub trait FindUncommented {
 119     fn find_uncommented(&self, pat: &str) -> Option<usize>;
 120 }
 121
 122 impl FindUncommented for str {
 123     fn find_uncommented(&self, pat: &str) -> Option<usize> {
 124         let mut needle_iter = pat.chars();
 125         for (kind, (i, b)) in CharClasses::new(self.char_indices()) {
 126             match needle_iter.next() {
 127                 None => {
 128                     return Some(i - pat.len());
 129                 }
 130                 Some(c) => match kind {
 131                     CodeCharKind::Normal if b == c => {}
 132                     _ => {
 133                         needle_iter = pat.chars();
 134                     }
 135                 },
 136             }
 137         }
 138
 139         // Handle case where the pattern is a suffix of the search string
 140         match needle_iter.next() {
 141             Some(_) => None,
 142             None => Some(self.len() - pat.len()),
 143         }
 144     }
 145 }
 146
 147 // Returns the first byte position after the first comment. The given string
 148 // is expected to be prefixed by a comment, including delimiters.
 149 // Good: "/* /* inner */ outer */ code();"
 150 // Bad:  "code(); // hello\n world!"
 151 pub fn find_comment_end(s: &str) -> Option<usize> {
 152     let mut iter = CharClasses::new(s.char_indices());
 153     for (kind, (i, _c)) in &mut iter {
 154         if kind == CodeCharKind::Normal {
 155             return Some(i);
 156         }
 157     }
 158
 159     // Handle case where the comment ends at the end of s.
 160     if iter.status == CharClassesStatus::Normal {
 161         Some(s.len())
 162     } else {
 163         None
 164     }
 165 }
 166
 167 /// Returns true if text contains any comment.
 168 pub fn contains_comment(text: &str) -> bool {
 169     CharClasses::new(text.chars()).any(|(kind, _)| kind == CodeCharKind::Comment)
 170 }
 171
 172 struct CharClasses<T>
 173     where T: Iterator,
 174           T::Item: RichChar
 175 {
 176     base: iter::Peekable<T>,
 177     status: CharClassesStatus,
 178 }
 179
 180 trait RichChar {
 181     fn get_char(&self) -> char;
 182 }
 183
 184 impl RichChar for char {
 185     fn get_char(&self) -> char {
 186         *self
 187     }
 188 }
 189
 190 impl RichChar for (usize, char) {
 191     fn get_char(&self) -> char {
 192         self.1
 193     }
 194 }
 195
 196 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 197 enum CharClassesStatus {
 198     Normal,
 199     LitString,
 200     LitStringEscape,
 201     LitChar,
 202     LitCharEscape,
 203     // The u32 is the nesting deepness of the comment
 204     BlockComment(u32),
 205     // Status when the '/' has been consumed, but not yet the '*', deepness is
 206     // the new deepness (after the comment opening).
 207     BlockCommentOpening(u32),
 208     // Status when the '*' has been consumed, but not yet the '/', deepness is
 209     // the new deepness (after the comment closing).
 210     BlockCommentClosing(u32),
 211     LineComment,
 212 }
 213
 214 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 215 pub enum CodeCharKind {
 216     Normal,
 217     Comment,
 218 }
 219
 220 impl<T> CharClasses<T> where T: Iterator, T::Item: RichChar {
 221     fn new(base: T) -> CharClasses<T> {
 222         CharClasses {
 223             base: base.peekable(),
 224             status: CharClassesStatus::Normal,
 225         }
 226     }
 227 }
 228
 229 impl<T> Iterator for CharClasses<T> where T: Iterator, T::Item: RichChar {
 230     type Item = (CodeCharKind, T::Item);
 231
 232     fn next(&mut self) -> Option<(CodeCharKind, T::Item)> {
 233         let item = try_opt!(self.base.next());
 234         let chr = item.get_char();
 235         self.status = match self.status {
 236             CharClassesStatus::LitString => match chr {
 237                 '"' => CharClassesStatus::Normal,
 238                 '\\' => CharClassesStatus::LitStringEscape,
 239                 _ => CharClassesStatus::LitString,
 240             },
 241             CharClassesStatus::LitStringEscape => CharClassesStatus::LitString,
 242             CharClassesStatus::LitChar => match chr {
 243                 '\\' => CharClassesStatus::LitCharEscape,
 244                 '\'' => CharClassesStatus::Normal,
 245                 _ => CharClassesStatus::LitChar,
 246             },
 247             CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
 248             CharClassesStatus::Normal => {
 249                 match chr {
 250                     '"' => CharClassesStatus::LitString,
 251                     '\'' => CharClassesStatus::LitChar,
 252                     '/' => match self.base.peek() {
 253                         Some(next) if next.get_char() == '*' => {
 254                             self.status = CharClassesStatus::BlockCommentOpening(1);
 255                             return Some((CodeCharKind::Comment, item));
 256                         }
 257                         Some(next) if next.get_char() == '/' => {
 258                             self.status = CharClassesStatus::LineComment;
 259                             return Some((CodeCharKind::Comment, item));
 260                         }
 261                         _ => CharClassesStatus::Normal,
 262                     },
 263                     _ => CharClassesStatus::Normal,
 264                 }
 265             }
 266             CharClassesStatus::BlockComment(deepness) => {
 267                 if deepness == 0 {
 268                     // This is the closing '/'
 269                     assert_eq!(chr, '/');
 270                     self.status = CharClassesStatus::Normal;
 271                     return Some((CodeCharKind::Comment, item));
 272                 }
 273                 self.status = match self.base.peek() {
 274                     Some(next) if next.get_char() == '/' && chr == '*' =>
 275                         CharClassesStatus::BlockCommentClosing(deepness - 1),
 276                     Some(next) if next.get_char() == '*' && chr == '/' =>
 277                         CharClassesStatus::BlockCommentOpening(deepness + 1),
 278                     _ => CharClassesStatus::BlockComment(deepness),
 279                 };
 280                 return Some((CodeCharKind::Comment, item));
 281             }
 282             CharClassesStatus::BlockCommentOpening(deepness) => {
 283                 assert_eq!(chr, '*');
 284                 self.status = CharClassesStatus::BlockComment(deepness);
 285                 return Some((CodeCharKind::Comment, item));
 286             }
 287             CharClassesStatus::BlockCommentClosing(deepness) => {
 288                 assert_eq!(chr, '/');
 289                 self.status = if deepness == 0 {
 290                     CharClassesStatus::Normal
 291                 } else {
 292                     CharClassesStatus::BlockComment(deepness)
 293                 };
 294                 return Some((CodeCharKind::Comment, item));
 295             }
 296             CharClassesStatus::LineComment => {
 297                 self.status = match chr {
 298                     '\n' => CharClassesStatus::Normal,
 299                     _ => CharClassesStatus::LineComment,
 300                 };
 301                 return Some((CodeCharKind::Comment, item));
 302             }
 303         };
 304         Some((CodeCharKind::Normal, item))
 305     }
 306 }
 307
 308 /// Iterator over an alternating sequence of functional and commented parts of
 309 /// a string. The first item is always a, possibly zero length, subslice of
 310 /// functional text. Line style comments contain their ending newlines.
 311 pub struct CommentCodeSlices<'a> {
 312     slice: &'a str,
 313     last_slice_kind: CodeCharKind,
 314     last_slice_end: usize,
 315 }
 316
 317 impl<'a> CommentCodeSlices<'a> {
 318     pub fn new(slice: &'a str) -> CommentCodeSlices<'a> {
 319         CommentCodeSlices {
 320             slice: slice,
 321             last_slice_kind: CodeCharKind::Comment,
 322             last_slice_end: 0,
 323         }
 324     }
 325 }
 326
 327 impl<'a> Iterator for CommentCodeSlices<'a> {
 328     type Item = (CodeCharKind, usize, &'a str);
 329
 330     fn next(&mut self) -> Option<Self::Item> {
 331         if self.last_slice_end == self.slice.len() {
 332             return None;
 333         }
 334
 335         let mut sub_slice_end = self.last_slice_end;
 336         let mut first_whitespace = None;
 337         let subslice = &self.slice[self.last_slice_end..];
 338         let mut iter = CharClasses::new(subslice.char_indices());
 339
 340         for (kind, (i, c)) in &mut iter {
 341             let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal &&
 342                                        &subslice[..2] == "//" &&
 343                                        [' ', '\t'].contains(&c);
 344
 345             if is_comment_connector && first_whitespace.is_none() {
 346                 first_whitespace = Some(i);
 347             }
 348
 349             if kind == self.last_slice_kind && !is_comment_connector {
 350                 let last_index = match first_whitespace {
 351                     Some(j) => j,
 352                     None => i,
 353                 };
 354                 sub_slice_end = self.last_slice_end + last_index;
 355                 break;
 356             }
 357
 358             if !is_comment_connector {
 359                 first_whitespace = None;
 360             }
 361         }
 362
 363         if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) {
 364             // This was the last subslice.
 365             sub_slice_end = match first_whitespace {
 366                 Some(i) => self.last_slice_end + i,
 367                 None => self.slice.len(),
 368             };
 369         }
 370
 371         let kind = match self.last_slice_kind {
 372             CodeCharKind::Comment => CodeCharKind::Normal,
 373             CodeCharKind::Normal => CodeCharKind::Comment,
 374         };
 375         let res = (kind,
 376                    self.last_slice_end,
 377                    &self.slice[self.last_slice_end..sub_slice_end]);
 378         self.last_slice_end = sub_slice_end;
 379         self.last_slice_kind = kind;
 380
 381         Some(res)
 382     }
 383 }
 384
 385 #[cfg(test)]
 386 mod test {
 387     use super::{CharClasses, CodeCharKind, contains_comment, rewrite_comment, FindUncommented,
 388                 CommentCodeSlices};
 389     use Indent;
 390
 391     #[test]
 392     fn char_classes() {
 393         let mut iter = CharClasses::new("//\n\n".chars());
 394
 395         assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
 396         assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
 397         assert_eq!((CodeCharKind::Comment, '\n'), iter.next().unwrap());
 398         assert_eq!((CodeCharKind::Normal, '\n'), iter.next().unwrap());
 399         assert_eq!(None, iter.next());
 400     }
 401
 402     #[test]
 403     fn comment_code_slices() {
 404         let input = "code(); /* test */ 1 + 1";
 405         let mut iter = CommentCodeSlices::new(input);
 406
 407         assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap());
 408         assert_eq!((CodeCharKind::Comment, 8, "/* test */"),
 409                    iter.next().unwrap());
 410         assert_eq!((CodeCharKind::Normal, 18, " 1 + 1"), iter.next().unwrap());
 411         assert_eq!(None, iter.next());
 412     }
 413
 414     #[test]
 415     fn comment_code_slices_two() {
 416         let input = "// comment\n    test();";
 417         let mut iter = CommentCodeSlices::new(input);
 418
 419         assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap());
 420         assert_eq!((CodeCharKind::Comment, 0, "// comment\n"),
 421                    iter.next().unwrap());
 422         assert_eq!((CodeCharKind::Normal, 11, "    test();"),
 423                    iter.next().unwrap());
 424         assert_eq!(None, iter.next());
 425     }
 426
 427     #[test]
 428     fn comment_code_slices_three() {
 429         let input = "1 // comment\n    // comment2\n\n";
 430         let mut iter = CommentCodeSlices::new(input);
 431
 432         assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap());
 433         assert_eq!((CodeCharKind::Comment, 2, "// comment\n    // comment2\n"),
 434                    iter.next().unwrap());
 435         assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap());
 436         assert_eq!(None, iter.next());
 437     }
 438
 439     #[test]
 440     #[cfg_attr(rustfmt, rustfmt_skip)]
 441     fn format_comments() {
 442         let mut config: ::config::Config = Default::default();
 443         config.wrap_comments = true;
 444         assert_eq!("/* test */", rewrite_comment(" //test", true, 100, Indent::new(0, 100),
 445                                                  &config).unwrap());
 446         assert_eq!("// comment\n// on a", rewrite_comment("// comment on a", false, 10,
 447                                                           Indent::empty(), &config).unwrap());
 448
 449         assert_eq!("//  A multi line comment\n            // between args.",
 450                    rewrite_comment("//  A multi line comment\n             // between args.",
 451                                    false,
 452                                    60,
 453                                    Indent::new(0, 12),
 454                                    &config).unwrap());
 455
 456         let input = "// comment";
 457         let expected =
 458             "/* com\n                                                                      \
 459              * men\n                                                                      \
 460              * t */";
 461         assert_eq!(expected, rewrite_comment(input, true, 9, Indent::new(0, 69), &config).unwrap());
 462
 463         assert_eq!("/* trimmed */", rewrite_comment("/*   trimmed    */", true, 100,
 464                                                     Indent::new(0, 100), &config).unwrap());
 465     }
 466
 467     // This is probably intended to be a non-test fn, but it is not used. I'm
 468     // keeping it around unless it helps us test stuff.
 469     fn uncommented(text: &str) -> String {
 470         CharClasses::new(text.chars())
 471             .filter_map(|(s, c)| {
 472                 match s {
 473                     CodeCharKind::Normal => Some(c),
 474                     CodeCharKind::Comment => None,
 475                 }
 476             })
 477             .collect()
 478     }
 479
 480     #[test]
 481     fn test_uncommented() {
 482         assert_eq!(&uncommented("abc/*...*/"), "abc");
 483         assert_eq!(&uncommented("// .... /* \n../* /* *** / */ */a/* // */c\n"),
 484                    "..ac\n");
 485         assert_eq!(&uncommented("abc \" /* */\" qsdf"), "abc \" /* */\" qsdf");
 486     }
 487
 488     #[test]
 489     fn test_contains_comment() {
 490         assert_eq!(contains_comment("abc"), false);
 491         assert_eq!(contains_comment("abc // qsdf"), true);
 492         assert_eq!(contains_comment("abc /* kqsdf"), true);
 493         assert_eq!(contains_comment("abc \" /* */\" qsdf"), false);
 494     }
 495
 496     #[test]
 497     fn test_find_uncommented() {
 498         fn check(haystack: &str, needle: &str, expected: Option<usize>) {
 499             assert_eq!(expected, haystack.find_uncommented(needle));
 500         }
 501
 502         check("/*/ */test", "test", Some(6));
 503         check("//test\ntest", "test", Some(7));
 504         check("/* comment only */", "whatever", None);
 505         check("/* comment */ some text /* more commentary */ result",
 506               "result",
 507               Some(46));
 508         check("sup // sup", "p", Some(2));
 509         check("sup", "x", None);
 510         check(r#"π? /**/ π is nice!"#, r#"π is nice"#, Some(9));
 511         check("/*sup yo? \n sup*/ sup", "p", Some(20));
 512         check("hel/*lohello*/lo", "hello", None);
 513         check("acb", "ab", None);
 514         check(",/*A*/ ", ",", Some(0));
 515         check("abc", "abc", Some(0));
 516         check("/* abc */", "abc", None);
 517         check("/**/abc/* */", "abc", Some(4));
 518         check("\"/* abc */\"", "abc", Some(4));
 519         check("\"/* abc", "abc", Some(4));
 520     }
 521 }