src/comment.rs

   1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 // Formatting and tools for comments.
  12
  13 use std::{self, iter};
  14
  15 use syntax::codemap::Span;
  16
  17 use config::Config;
  18 use rewrite::RewriteContext;
  19 use shape::{Indent, Shape};
  20 use string::{rewrite_string, StringFormat};
  21 use utils::{count_newlines, first_line_width, last_line_width};
  22
  23 fn is_custom_comment(comment: &str) -> bool {
  24     if !comment.starts_with("//") {
  25         false
  26     } else if let Some(c) = comment.chars().nth(2) {
  27         !c.is_alphanumeric() && !c.is_whitespace()
  28     } else {
  29         false
  30     }
  31 }
  32
  33 #[derive(Copy, Clone, PartialEq, Eq)]
  34 pub enum CommentStyle<'a> {
  35     DoubleSlash,
  36     TripleSlash,
  37     Doc,
  38     SingleBullet,
  39     DoubleBullet,
  40     Exclamation,
  41     Custom(&'a str),
  42 }
  43
  44 fn custom_opener(s: &str) -> &str {
  45     s.lines().next().map_or("", |first_line| {
  46         first_line
  47             .find(' ')
  48             .map_or(first_line, |space_index| &first_line[0..space_index + 1])
  49     })
  50 }
  51
  52 impl<'a> CommentStyle<'a> {
  53     pub fn opener(&self) -> &'a str {
  54         match *self {
  55             CommentStyle::DoubleSlash => "// ",
  56             CommentStyle::TripleSlash => "/// ",
  57             CommentStyle::Doc => "//! ",
  58             CommentStyle::SingleBullet => "/* ",
  59             CommentStyle::DoubleBullet => "/** ",
  60             CommentStyle::Exclamation => "/*! ",
  61             CommentStyle::Custom(opener) => opener,
  62         }
  63     }
  64
  65     pub fn closer(&self) -> &'a str {
  66         match *self {
  67             CommentStyle::DoubleSlash
  68             | CommentStyle::TripleSlash
  69             | CommentStyle::Custom(..)
  70             | CommentStyle::Doc => "",
  71             CommentStyle::DoubleBullet => " **/",
  72             CommentStyle::SingleBullet | CommentStyle::Exclamation => " */",
  73         }
  74     }
  75
  76     pub fn line_start(&self) -> &'a str {
  77         match *self {
  78             CommentStyle::DoubleSlash => "// ",
  79             CommentStyle::TripleSlash => "/// ",
  80             CommentStyle::Doc => "//! ",
  81             CommentStyle::SingleBullet | CommentStyle::Exclamation => " * ",
  82             CommentStyle::DoubleBullet => " ** ",
  83             CommentStyle::Custom(opener) => opener,
  84         }
  85     }
  86
  87     pub fn to_str_tuplet(&self) -> (&'a str, &'a str, &'a str) {
  88         (self.opener(), self.closer(), self.line_start())
  89     }
  90
  91     pub fn line_with_same_comment_style(&self, line: &str, normalize_comments: bool) -> bool {
  92         match *self {
  93             CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
  94                 line.trim_left().starts_with(self.line_start().trim_left())
  95                     || comment_style(line, normalize_comments) == *self
  96             }
  97             CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
  98                 line.trim_left().starts_with(self.closer().trim_left())
  99                     || line.trim_left().starts_with(self.line_start().trim_left())
 100                     || comment_style(line, normalize_comments) == *self
 101             }
 102             CommentStyle::Custom(opener) => line.trim_left().starts_with(opener.trim_right()),
 103         }
 104     }
 105 }
 106
 107 fn comment_style(orig: &str, normalize_comments: bool) -> CommentStyle {
 108     if !normalize_comments {
 109         if orig.starts_with("/**") && !orig.starts_with("/**/") {
 110             CommentStyle::DoubleBullet
 111         } else if orig.starts_with("/*!") {
 112             CommentStyle::Exclamation
 113         } else if orig.starts_with("/*") {
 114             CommentStyle::SingleBullet
 115         } else if orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/') {
 116             CommentStyle::TripleSlash
 117         } else if orig.starts_with("//!") {
 118             CommentStyle::Doc
 119         } else if is_custom_comment(orig) {
 120             CommentStyle::Custom(custom_opener(orig))
 121         } else {
 122             CommentStyle::DoubleSlash
 123         }
 124     } else if (orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/'))
 125         || (orig.starts_with("/**") && !orig.starts_with("/**/"))
 126     {
 127         CommentStyle::TripleSlash
 128     } else if orig.starts_with("//!") || orig.starts_with("/*!") {
 129         CommentStyle::Doc
 130     } else if is_custom_comment(orig) {
 131         CommentStyle::Custom(custom_opener(orig))
 132     } else {
 133         CommentStyle::DoubleSlash
 134     }
 135 }
 136
 137 pub fn combine_strs_with_missing_comments(
 138     context: &RewriteContext,
 139     prev_str: &str,
 140     next_str: &str,
 141     span: Span,
 142     shape: Shape,
 143     allow_extend: bool,
 144 ) -> Option<String> {
 145     let mut allow_one_line = !prev_str.contains('\n') && !next_str.contains('\n');
 146     let first_sep = if prev_str.is_empty() || next_str.is_empty() {
 147         ""
 148     } else {
 149         " "
 150     };
 151     let mut one_line_width =
 152         last_line_width(prev_str) + first_line_width(next_str) + first_sep.len();
 153
 154     let indent_str = shape.indent.to_string(context.config);
 155     let missing_comment = rewrite_missing_comment(span, shape, context)?;
 156
 157     if missing_comment.is_empty() {
 158         if allow_extend && prev_str.len() + first_sep.len() + next_str.len() <= shape.width {
 159             return Some(format!("{}{}{}", prev_str, first_sep, next_str));
 160         } else {
 161             let sep = if prev_str.is_empty() {
 162                 String::new()
 163             } else {
 164                 String::from("\n") + &indent_str
 165             };
 166             return Some(format!("{}{}{}", prev_str, sep, next_str));
 167         }
 168     }
 169
 170     // We have a missing comment between the first expression and the second expression.
 171
 172     // Peek the the original source code and find out whether there is a newline between the first
 173     // expression and the second expression or the missing comment. We will preserve the original
 174     // layout whenever possible.
 175     let original_snippet = context.snippet(span);
 176     let prefer_same_line = if let Some(pos) = original_snippet.chars().position(|c| c == '/') {
 177         !original_snippet[..pos].contains('\n')
 178     } else {
 179         !original_snippet.contains('\n')
 180     };
 181
 182     one_line_width -= first_sep.len();
 183     let first_sep = if prev_str.is_empty() || missing_comment.is_empty() {
 184         String::new()
 185     } else {
 186         let one_line_width = last_line_width(prev_str) + first_line_width(&missing_comment) + 1;
 187         if prefer_same_line && one_line_width <= shape.width {
 188             String::from(" ")
 189         } else {
 190             format!("\n{}", indent_str)
 191         }
 192     };
 193     let second_sep = if missing_comment.is_empty() || next_str.is_empty() {
 194         String::new()
 195     } else if missing_comment.starts_with("//") {
 196         format!("\n{}", indent_str)
 197     } else {
 198         one_line_width += missing_comment.len() + first_sep.len() + 1;
 199         allow_one_line &= !missing_comment.starts_with("//") && !missing_comment.contains('\n');
 200         if prefer_same_line && allow_one_line && one_line_width <= shape.width {
 201             String::from(" ")
 202         } else {
 203             format!("\n{}", indent_str)
 204         }
 205     };
 206     Some(format!(
 207         "{}{}{}{}{}",
 208         prev_str, first_sep, missing_comment, second_sep, next_str,
 209     ))
 210 }
 211
 212 pub fn rewrite_comment(
 213     orig: &str,
 214     block_style: bool,
 215     shape: Shape,
 216     config: &Config,
 217 ) -> Option<String> {
 218     // If there are lines without a starting sigil, we won't format them correctly
 219     // so in that case we won't even re-align (if !config.normalize_comments()) and
 220     // we should stop now.
 221     let num_bare_lines = orig.lines()
 222         .map(|line| line.trim())
 223         .filter(|l| !(l.starts_with('*') || l.starts_with("//") || l.starts_with("/*")))
 224         .count();
 225     if num_bare_lines > 0 && !config.normalize_comments() {
 226         return Some(orig.to_owned());
 227     }
 228     if !config.normalize_comments() && !config.wrap_comments() {
 229         return light_rewrite_comment(orig, shape.indent, config);
 230     }
 231
 232     identify_comment(orig, block_style, shape, config)
 233 }
 234
 235 fn identify_comment(
 236     orig: &str,
 237     block_style: bool,
 238     shape: Shape,
 239     config: &Config,
 240 ) -> Option<String> {
 241     let style = comment_style(orig, false);
 242     let first_group = orig.lines()
 243         .take_while(|l| style.line_with_same_comment_style(l, false))
 244         .collect::<Vec<_>>()
 245         .join("\n");
 246     let rest = orig.lines()
 247         .skip(first_group.lines().count())
 248         .collect::<Vec<_>>()
 249         .join("\n");
 250
 251     let first_group_str = rewrite_comment_inner(&first_group, block_style, style, shape, config)?;
 252     if rest.is_empty() {
 253         Some(first_group_str)
 254     } else {
 255         identify_comment(&rest, block_style, shape, config).map(|rest_str| {
 256             format!(
 257                 "{}\n{}{}",
 258                 first_group_str,
 259                 shape.indent.to_string(config),
 260                 rest_str
 261             )
 262         })
 263     }
 264 }
 265
 266 fn rewrite_comment_inner(
 267     orig: &str,
 268     block_style: bool,
 269     style: CommentStyle,
 270     shape: Shape,
 271     config: &Config,
 272 ) -> Option<String> {
 273     let (opener, closer, line_start) = if block_style {
 274         CommentStyle::SingleBullet.to_str_tuplet()
 275     } else {
 276         comment_style(orig, config.normalize_comments()).to_str_tuplet()
 277     };
 278
 279     let max_chars = shape
 280         .width
 281         .checked_sub(closer.len() + opener.len())
 282         .unwrap_or(1);
 283     let indent_str = shape.indent.to_string(config);
 284     let fmt_indent = shape.indent + (opener.len() - line_start.len());
 285     let mut fmt = StringFormat {
 286         opener: "",
 287         closer: "",
 288         line_start: line_start,
 289         line_end: "",
 290         shape: Shape::legacy(max_chars, fmt_indent),
 291         trim_end: true,
 292         config: config,
 293     };
 294
 295     let line_breaks = count_newlines(orig.trim_right());
 296     let lines = orig.lines()
 297         .enumerate()
 298         .map(|(i, mut line)| {
 299             line = line.trim();
 300             // Drop old closer.
 301             if i == line_breaks && line.ends_with("*/") && !line.starts_with("//") {
 302                 line = line[..(line.len() - 2)].trim_right();
 303             }
 304
 305             line
 306         })
 307         .map(|s| left_trim_comment_line(s, &style))
 308         .map(|(line, has_leading_whitespace)| {
 309             if orig.starts_with("/*") && line_breaks == 0 {
 310                 (
 311                     line.trim_left(),
 312                     has_leading_whitespace || config.normalize_comments(),
 313                 )
 314             } else {
 315                 (line, has_leading_whitespace || config.normalize_comments())
 316             }
 317         });
 318
 319     let mut result = String::with_capacity(orig.len() * 2);
 320     result.push_str(opener);
 321     let mut is_prev_line_multi_line = false;
 322     let mut inside_code_block = false;
 323     let comment_line_separator = format!("\n{}{}", indent_str, line_start);
 324     for (i, (line, has_leading_whitespace)) in lines.enumerate() {
 325         let is_last = i == count_newlines(orig);
 326         if result == opener {
 327             let force_leading_whitespace = opener == "/* " && count_newlines(orig) == 0;
 328             if !has_leading_whitespace && !force_leading_whitespace && result.ends_with(' ') {
 329                 result.pop();
 330             }
 331             if line.is_empty() {
 332                 continue;
 333             }
 334         } else if is_prev_line_multi_line && !line.is_empty() {
 335             result.push(' ')
 336         } else if is_last && !closer.is_empty() && line.is_empty() {
 337             result.push('\n');
 338             result.push_str(&indent_str);
 339         } else {
 340             result.push_str(&comment_line_separator);
 341             if !has_leading_whitespace && result.ends_with(' ') {
 342                 result.pop();
 343             }
 344         }
 345
 346         if line.starts_with("```") {
 347             inside_code_block = !inside_code_block;
 348         }
 349         if inside_code_block {
 350             if line.is_empty() && result.ends_with(' ') {
 351                 result.pop();
 352             } else {
 353                 result.push_str(line);
 354             }
 355             continue;
 356         }
 357
 358         if config.wrap_comments() && line.len() > fmt.shape.width && !has_url(line) {
 359             match rewrite_string(line, &fmt, Some(max_chars)) {
 360                 Some(ref s) => {
 361                     is_prev_line_multi_line = s.contains('\n');
 362                     result.push_str(s);
 363                 }
 364                 None if is_prev_line_multi_line => {
 365                     // We failed to put the current `line` next to the previous `line`.
 366                     // Remove the trailing space, then start rewrite on the next line.
 367                     result.pop();
 368                     result.push_str(&comment_line_separator);
 369                     fmt.shape = Shape::legacy(max_chars, fmt_indent);
 370                     match rewrite_string(line, &fmt, Some(max_chars)) {
 371                         Some(ref s) => {
 372                             is_prev_line_multi_line = s.contains('\n');
 373                             result.push_str(s);
 374                         }
 375                         None => {
 376                             is_prev_line_multi_line = false;
 377                             result.push_str(line);
 378                         }
 379                     }
 380                 }
 381                 None => {
 382                     is_prev_line_multi_line = false;
 383                     result.push_str(line);
 384                 }
 385             }
 386
 387             fmt.shape = if is_prev_line_multi_line {
 388                 // 1 = " "
 389                 let offset = 1 + last_line_width(&result) - line_start.len();
 390                 Shape {
 391                     width: max_chars.checked_sub(offset).unwrap_or(0),
 392                     indent: fmt_indent,
 393                     offset: fmt.shape.offset + offset,
 394                 }
 395             } else {
 396                 Shape::legacy(max_chars, fmt_indent)
 397             };
 398         } else {
 399             if line.is_empty() && result.ends_with(' ') && !is_last {
 400                 // Remove space if this is an empty comment or a doc comment.
 401                 result.pop();
 402             }
 403             result.push_str(line);
 404             fmt.shape = Shape::legacy(max_chars, fmt_indent);
 405             is_prev_line_multi_line = false;
 406         }
 407     }
 408
 409     result.push_str(closer);
 410     if result == opener && result.ends_with(' ') {
 411         // Trailing space.
 412         result.pop();
 413     }
 414
 415     Some(result)
 416 }
 417
 418 /// Returns true if the given string MAY include URLs or alike.
 419 fn has_url(s: &str) -> bool {
 420     // This function may return false positive, but should get its job done in most cases.
 421     s.contains("https://") || s.contains("http://") || s.contains("ftp://") || s.contains("file://")
 422 }
 423
 424 /// Given the span, rewrite the missing comment inside it if available.
 425 /// Note that the given span must only include comments (or leading/trailing whitespaces).
 426 pub fn rewrite_missing_comment(
 427     span: Span,
 428     shape: Shape,
 429     context: &RewriteContext,
 430 ) -> Option<String> {
 431     let missing_snippet = context.snippet(span);
 432     let trimmed_snippet = missing_snippet.trim();
 433     if !trimmed_snippet.is_empty() {
 434         rewrite_comment(trimmed_snippet, false, shape, context.config)
 435     } else {
 436         Some(String::new())
 437     }
 438 }
 439
 440 /// Recover the missing comments in the specified span, if available.
 441 /// The layout of the comments will be preserved as long as it does not break the code
 442 /// and its total width does not exceed the max width.
 443 pub fn recover_missing_comment_in_span(
 444     span: Span,
 445     shape: Shape,
 446     context: &RewriteContext,
 447     used_width: usize,
 448 ) -> Option<String> {
 449     let missing_comment = rewrite_missing_comment(span, shape, context)?;
 450     if missing_comment.is_empty() {
 451         Some(String::new())
 452     } else {
 453         let missing_snippet = context.snippet(span);
 454         let pos = missing_snippet.chars().position(|c| c == '/').unwrap_or(0);
 455         // 1 = ` `
 456         let total_width = missing_comment.len() + used_width + 1;
 457         let force_new_line_before_comment =
 458             missing_snippet[..pos].contains('\n') || total_width > context.config.max_width();
 459         let sep = if force_new_line_before_comment {
 460             format!("\n{}", shape.indent.to_string(context.config))
 461         } else {
 462             String::from(" ")
 463         };
 464         Some(format!("{}{}", sep, missing_comment))
 465     }
 466 }
 467
 468 /// Trims whitespace and aligns to indent, but otherwise does not change comments.
 469 fn light_rewrite_comment(orig: &str, offset: Indent, config: &Config) -> Option<String> {
 470     let lines: Vec<&str> = orig.lines()
 471         .map(|l| {
 472             // This is basically just l.trim(), but in the case that a line starts
 473             // with `*` we want to leave one space before it, so it aligns with the
 474             // `*` in `/*`.
 475             let first_non_whitespace = l.find(|c| !char::is_whitespace(c));
 476             if let Some(fnw) = first_non_whitespace {
 477                 if l.as_bytes()[fnw] == b'*' && fnw > 0 {
 478                     &l[fnw - 1..]
 479                 } else {
 480                     &l[fnw..]
 481                 }
 482             } else {
 483                 ""
 484             }.trim_right()
 485         })
 486         .collect();
 487     Some(lines.join(&format!("\n{}", offset.to_string(config))))
 488 }
 489
 490 /// Trims comment characters and possibly a single space from the left of a string.
 491 /// Does not trim all whitespace. If a single space is trimmed from the left of the string,
 492 /// this function returns true.
 493 fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle) -> (&'a str, bool) {
 494     if line.starts_with("//! ") || line.starts_with("/// ") || line.starts_with("/*! ")
 495         || line.starts_with("/** ")
 496     {
 497         (&line[4..], true)
 498     } else if let CommentStyle::Custom(opener) = *style {
 499         if line.starts_with(opener) {
 500             (&line[opener.len()..], true)
 501         } else {
 502             (&line[opener.trim_right().len()..], false)
 503         }
 504     } else if line.starts_with("/* ") || line.starts_with("// ") || line.starts_with("//!")
 505         || line.starts_with("///") || line.starts_with("** ")
 506         || line.starts_with("/*!")
 507         || (line.starts_with("/**") && !line.starts_with("/**/"))
 508     {
 509         (&line[3..], line.chars().nth(2).unwrap() == ' ')
 510     } else if line.starts_with("/*") || line.starts_with("* ") || line.starts_with("//")
 511         || line.starts_with("**")
 512     {
 513         (&line[2..], line.chars().nth(1).unwrap() == ' ')
 514     } else if line.starts_with('*') {
 515         (&line[1..], false)
 516     } else {
 517         (line, line.starts_with(' '))
 518     }
 519 }
 520
 521 pub trait FindUncommented {
 522     fn find_uncommented(&self, pat: &str) -> Option<usize>;
 523 }
 524
 525 impl FindUncommented for str {
 526     fn find_uncommented(&self, pat: &str) -> Option<usize> {
 527         let mut needle_iter = pat.chars();
 528         for (kind, (i, b)) in CharClasses::new(self.char_indices()) {
 529             match needle_iter.next() {
 530                 None => {
 531                     return Some(i - pat.len());
 532                 }
 533                 Some(c) => match kind {
 534                     FullCodeCharKind::Normal | FullCodeCharKind::InString if b == c => {}
 535                     _ => {
 536                         needle_iter = pat.chars();
 537                     }
 538                 },
 539             }
 540         }
 541
 542         // Handle case where the pattern is a suffix of the search string
 543         match needle_iter.next() {
 544             Some(_) => None,
 545             None => Some(self.len() - pat.len()),
 546         }
 547     }
 548 }
 549
 550 // Returns the first byte position after the first comment. The given string
 551 // is expected to be prefixed by a comment, including delimiters.
 552 // Good: "/* /* inner */ outer */ code();"
 553 // Bad:  "code(); // hello\n world!"
 554 pub fn find_comment_end(s: &str) -> Option<usize> {
 555     let mut iter = CharClasses::new(s.char_indices());
 556     for (kind, (i, _c)) in &mut iter {
 557         if kind == FullCodeCharKind::Normal || kind == FullCodeCharKind::InString {
 558             return Some(i);
 559         }
 560     }
 561
 562     // Handle case where the comment ends at the end of s.
 563     if iter.status == CharClassesStatus::Normal {
 564         Some(s.len())
 565     } else {
 566         None
 567     }
 568 }
 569
 570 /// Returns true if text contains any comment.
 571 pub fn contains_comment(text: &str) -> bool {
 572     CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
 573 }
 574
 575 /// Remove trailing spaces from the specified snippet. We do not remove spaces
 576 /// inside strings or comments.
 577 pub fn remove_trailing_white_spaces(text: &str) -> String {
 578     let mut buffer = String::with_capacity(text.len());
 579     let mut space_buffer = String::with_capacity(128);
 580     for (char_kind, c) in CharClasses::new(text.chars()) {
 581         match c {
 582             '\n' => {
 583                 if char_kind == FullCodeCharKind::InString {
 584                     buffer.push_str(&space_buffer);
 585                 }
 586                 space_buffer.clear();
 587                 buffer.push('\n');
 588             }
 589             _ if c.is_whitespace() => {
 590                 space_buffer.push(c);
 591             }
 592             _ => {
 593                 if !space_buffer.is_empty() {
 594                     buffer.push_str(&space_buffer);
 595                     space_buffer.clear();
 596                 }
 597                 buffer.push(c);
 598             }
 599         }
 600     }
 601     buffer
 602 }
 603
 604 pub struct CharClasses<T>
 605 where
 606     T: Iterator,
 607     T::Item: RichChar,
 608 {
 609     base: iter::Peekable<T>,
 610     status: CharClassesStatus,
 611 }
 612
 613 pub trait RichChar {
 614     fn get_char(&self) -> char;
 615 }
 616
 617 impl RichChar for char {
 618     fn get_char(&self) -> char {
 619         *self
 620     }
 621 }
 622
 623 impl RichChar for (usize, char) {
 624     fn get_char(&self) -> char {
 625         self.1
 626     }
 627 }
 628
 629 impl RichChar for (char, usize) {
 630     fn get_char(&self) -> char {
 631         self.0
 632     }
 633 }
 634
 635 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 636 enum CharClassesStatus {
 637     Normal,
 638     LitString,
 639     LitStringEscape,
 640     LitChar,
 641     LitCharEscape,
 642     // The u32 is the nesting deepness of the comment
 643     BlockComment(u32),
 644     // Status when the '/' has been consumed, but not yet the '*', deepness is
 645     // the new deepness (after the comment opening).
 646     BlockCommentOpening(u32),
 647     // Status when the '*' has been consumed, but not yet the '/', deepness is
 648     // the new deepness (after the comment closing).
 649     BlockCommentClosing(u32),
 650     LineComment,
 651 }
 652
 653 /// Distinguish between functional part of code and comments
 654 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 655 pub enum CodeCharKind {
 656     Normal,
 657     Comment,
 658 }
 659
 660 /// Distinguish between functional part of code and comments,
 661 /// describing opening and closing of comments for ease when chunking
 662 /// code from tagged characters
 663 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 664 pub enum FullCodeCharKind {
 665     Normal,
 666     /// The first character of a comment, there is only one for a comment (always '/')
 667     StartComment,
 668     /// Any character inside a comment including the second character of comment
 669     /// marks ("//", "/*")
 670     InComment,
 671     /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
 672     EndComment,
 673     /// Inside a string.
 674     InString,
 675 }
 676
 677 impl FullCodeCharKind {
 678     pub fn is_comment(&self) -> bool {
 679         match *self {
 680             FullCodeCharKind::StartComment
 681             | FullCodeCharKind::InComment
 682             | FullCodeCharKind::EndComment => true,
 683             _ => false,
 684         }
 685     }
 686
 687     pub fn is_string(&self) -> bool {
 688         *self == FullCodeCharKind::InString
 689     }
 690
 691     fn to_codecharkind(&self) -> CodeCharKind {
 692         if self.is_comment() {
 693             CodeCharKind::Comment
 694         } else {
 695             CodeCharKind::Normal
 696         }
 697     }
 698 }
 699
 700 impl<T> CharClasses<T>
 701 where
 702     T: Iterator,
 703     T::Item: RichChar,
 704 {
 705     pub fn new(base: T) -> CharClasses<T> {
 706         CharClasses {
 707             base: base.peekable(),
 708             status: CharClassesStatus::Normal,
 709         }
 710     }
 711 }
 712
 713 impl<T> Iterator for CharClasses<T>
 714 where
 715     T: Iterator,
 716     T::Item: RichChar,
 717 {
 718     type Item = (FullCodeCharKind, T::Item);
 719
 720     fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
 721         let item = self.base.next()?;
 722         let chr = item.get_char();
 723         let mut char_kind = FullCodeCharKind::Normal;
 724         self.status = match self.status {
 725             CharClassesStatus::LitString => match chr {
 726                 '"' => CharClassesStatus::Normal,
 727                 '\\' => {
 728                     char_kind = FullCodeCharKind::InString;
 729                     CharClassesStatus::LitStringEscape
 730                 }
 731                 _ => {
 732                     char_kind = FullCodeCharKind::InString;
 733                     CharClassesStatus::LitString
 734                 }
 735             },
 736             CharClassesStatus::LitStringEscape => {
 737                 char_kind = FullCodeCharKind::InString;
 738                 CharClassesStatus::LitString
 739             }
 740             CharClassesStatus::LitChar => match chr {
 741                 '\\' => CharClassesStatus::LitCharEscape,
 742                 '\'' => CharClassesStatus::Normal,
 743                 _ => CharClassesStatus::LitChar,
 744             },
 745             CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
 746             CharClassesStatus::Normal => match chr {
 747                 '"' => {
 748                     char_kind = FullCodeCharKind::InString;
 749                     CharClassesStatus::LitString
 750                 }
 751                 '\'' => CharClassesStatus::LitChar,
 752                 '/' => match self.base.peek() {
 753                     Some(next) if next.get_char() == '*' => {
 754                         self.status = CharClassesStatus::BlockCommentOpening(1);
 755                         return Some((FullCodeCharKind::StartComment, item));
 756                     }
 757                     Some(next) if next.get_char() == '/' => {
 758                         self.status = CharClassesStatus::LineComment;
 759                         return Some((FullCodeCharKind::StartComment, item));
 760                     }
 761                     _ => CharClassesStatus::Normal,
 762                 },
 763                 _ => CharClassesStatus::Normal,
 764             },
 765             CharClassesStatus::BlockComment(deepness) => {
 766                 assert_ne!(deepness, 0);
 767                 self.status = match self.base.peek() {
 768                     Some(next) if next.get_char() == '/' && chr == '*' => {
 769                         CharClassesStatus::BlockCommentClosing(deepness - 1)
 770                     }
 771                     Some(next) if next.get_char() == '*' && chr == '/' => {
 772                         CharClassesStatus::BlockCommentOpening(deepness + 1)
 773                     }
 774                     _ => CharClassesStatus::BlockComment(deepness),
 775                 };
 776                 return Some((FullCodeCharKind::InComment, item));
 777             }
 778             CharClassesStatus::BlockCommentOpening(deepness) => {
 779                 assert_eq!(chr, '*');
 780                 self.status = CharClassesStatus::BlockComment(deepness);
 781                 return Some((FullCodeCharKind::InComment, item));
 782             }
 783             CharClassesStatus::BlockCommentClosing(deepness) => {
 784                 assert_eq!(chr, '/');
 785                 if deepness == 0 {
 786                     self.status = CharClassesStatus::Normal;
 787                     return Some((FullCodeCharKind::EndComment, item));
 788                 } else {
 789                     self.status = CharClassesStatus::BlockComment(deepness);
 790                     return Some((FullCodeCharKind::InComment, item));
 791                 }
 792             }
 793             CharClassesStatus::LineComment => match chr {
 794                 '\n' => {
 795                     self.status = CharClassesStatus::Normal;
 796                     return Some((FullCodeCharKind::EndComment, item));
 797                 }
 798                 _ => {
 799                     self.status = CharClassesStatus::LineComment;
 800                     return Some((FullCodeCharKind::InComment, item));
 801                 }
 802             },
 803         };
 804         Some((char_kind, item))
 805     }
 806 }
 807
 808 /// Iterator over functional and commented parts of a string. Any part of a string is either
 809 /// functional code, either *one* block comment, either *one* line comment. Whitespace between
 810 /// comments is functional code. Line comments contain their ending newlines.
 811 struct UngroupedCommentCodeSlices<'a> {
 812     slice: &'a str,
 813     iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
 814 }
 815
 816 impl<'a> UngroupedCommentCodeSlices<'a> {
 817     fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
 818         UngroupedCommentCodeSlices {
 819             slice: code,
 820             iter: CharClasses::new(code.char_indices()).peekable(),
 821         }
 822     }
 823 }
 824
 825 impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
 826     type Item = (CodeCharKind, usize, &'a str);
 827
 828     fn next(&mut self) -> Option<Self::Item> {
 829         let (kind, (start_idx, _)) = self.iter.next()?;
 830         match kind {
 831             FullCodeCharKind::Normal | FullCodeCharKind::InString => {
 832                 // Consume all the Normal code
 833                 while let Some(&(char_kind, _)) = self.iter.peek() {
 834                     if char_kind.is_comment() {
 835                         break;
 836                     }
 837                     let _ = self.iter.next();
 838                 }
 839             }
 840             FullCodeCharKind::StartComment => {
 841                 // Consume the whole comment
 842                 while let Some((FullCodeCharKind::InComment, (_, _))) = self.iter.next() {}
 843             }
 844             _ => panic!(),
 845         }
 846         let slice = match self.iter.peek() {
 847             Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
 848             None => &self.slice[start_idx..],
 849         };
 850         Some((
 851             if kind.is_comment() {
 852                 CodeCharKind::Comment
 853             } else {
 854                 CodeCharKind::Normal
 855             },
 856             start_idx,
 857             slice,
 858         ))
 859     }
 860 }
 861
 862 /// Iterator over an alternating sequence of functional and commented parts of
 863 /// a string. The first item is always a, possibly zero length, subslice of
 864 /// functional text. Line style comments contain their ending newlines.
 865 pub struct CommentCodeSlices<'a> {
 866     slice: &'a str,
 867     last_slice_kind: CodeCharKind,
 868     last_slice_end: usize,
 869 }
 870
 871 impl<'a> CommentCodeSlices<'a> {
 872     pub fn new(slice: &'a str) -> CommentCodeSlices<'a> {
 873         CommentCodeSlices {
 874             slice: slice,
 875             last_slice_kind: CodeCharKind::Comment,
 876             last_slice_end: 0,
 877         }
 878     }
 879 }
 880
 881 impl<'a> Iterator for CommentCodeSlices<'a> {
 882     type Item = (CodeCharKind, usize, &'a str);
 883
 884     fn next(&mut self) -> Option<Self::Item> {
 885         if self.last_slice_end == self.slice.len() {
 886             return None;
 887         }
 888
 889         let mut sub_slice_end = self.last_slice_end;
 890         let mut first_whitespace = None;
 891         let subslice = &self.slice[self.last_slice_end..];
 892         let mut iter = CharClasses::new(subslice.char_indices());
 893
 894         for (kind, (i, c)) in &mut iter {
 895             let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal
 896                 && &subslice[..2] == "//"
 897                 && [' ', '\t'].contains(&c);
 898
 899             if is_comment_connector && first_whitespace.is_none() {
 900                 first_whitespace = Some(i);
 901             }
 902
 903             if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
 904                 let last_index = match first_whitespace {
 905                     Some(j) => j,
 906                     None => i,
 907                 };
 908                 sub_slice_end = self.last_slice_end + last_index;
 909                 break;
 910             }
 911
 912             if !is_comment_connector {
 913                 first_whitespace = None;
 914             }
 915         }
 916
 917         if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) {
 918             // This was the last subslice.
 919             sub_slice_end = match first_whitespace {
 920                 Some(i) => self.last_slice_end + i,
 921                 None => self.slice.len(),
 922             };
 923         }
 924
 925         let kind = match self.last_slice_kind {
 926             CodeCharKind::Comment => CodeCharKind::Normal,
 927             CodeCharKind::Normal => CodeCharKind::Comment,
 928         };
 929         let res = (
 930             kind,
 931             self.last_slice_end,
 932             &self.slice[self.last_slice_end..sub_slice_end],
 933         );
 934         self.last_slice_end = sub_slice_end;
 935         self.last_slice_kind = kind;
 936
 937         Some(res)
 938     }
 939 }
 940
 941 /// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
 942 /// (if it fits in the width/offset, else return None), else return `new`
 943 pub fn recover_comment_removed(
 944     new: String,
 945     span: Span,
 946     context: &RewriteContext,
 947 ) -> Option<String> {
 948     let snippet = context.snippet(span);
 949     if snippet != new && changed_comment_content(snippet, &new) {
 950         // We missed some comments. Keep the original text.
 951         Some(snippet.to_owned())
 952     } else {
 953         Some(new)
 954     }
 955 }
 956
 957 /// Return true if the two strings of code have the same payload of comments.
 958 /// The payload of comments is everything in the string except:
 959 ///     - actual code (not comments)
 960 ///     - comment start/end marks
 961 ///     - whitespace
 962 ///     - '*' at the beginning of lines in block comments
 963 fn changed_comment_content(orig: &str, new: &str) -> bool {
 964     // Cannot write this as a fn since we cannot return types containing closures
 965     let code_comment_content = |code| {
 966         let slices = UngroupedCommentCodeSlices::new(code);
 967         slices
 968             .filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
 969             .flat_map(|(_, _, s)| CommentReducer::new(s))
 970     };
 971     let res = code_comment_content(orig).ne(code_comment_content(new));
 972     debug!(
 973         "comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
 974         res,
 975         orig,
 976         new,
 977         code_comment_content(orig).collect::<String>(),
 978         code_comment_content(new).collect::<String>()
 979     );
 980     res
 981 }
 982
 983 /// Iterator over the 'payload' characters of a comment.
 984 /// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
 985 /// The comment must be one comment, ie not more than one start mark (no multiple line comments,
 986 /// for example).
 987 struct CommentReducer<'a> {
 988     is_block: bool,
 989     at_start_line: bool,
 990     iter: std::str::Chars<'a>,
 991 }
 992
 993 impl<'a> CommentReducer<'a> {
 994     fn new(comment: &'a str) -> CommentReducer<'a> {
 995         let is_block = comment.starts_with("/*");
 996         let comment = remove_comment_header(comment);
 997         CommentReducer {
 998             is_block: is_block,
 999             at_start_line: false, // There are no supplementary '*' on the first line
1000             iter: comment.chars(),
1001         }
1002     }
1003 }
1004
1005 impl<'a> Iterator for CommentReducer<'a> {
1006     type Item = char;
1007     fn next(&mut self) -> Option<Self::Item> {
1008         loop {
1009             let mut c = self.iter.next()?;
1010             if self.is_block && self.at_start_line {
1011                 while c.is_whitespace() {
1012                     c = self.iter.next()?;
1013                 }
1014                 // Ignore leading '*'
1015                 if c == '*' {
1016                     c = self.iter.next()?;
1017                 }
1018             } else if c == '\n' {
1019                 self.at_start_line = true;
1020             }
1021             if !c.is_whitespace() {
1022                 return Some(c);
1023             }
1024         }
1025     }
1026 }
1027
1028 fn remove_comment_header(comment: &str) -> &str {
1029     if comment.starts_with("///") || comment.starts_with("//!") {
1030         &comment[3..]
1031     } else if comment.starts_with("//") {
1032         &comment[2..]
1033     } else if (comment.starts_with("/**") && !comment.starts_with("/**/"))
1034         || comment.starts_with("/*!")
1035     {
1036         &comment[3..comment.len() - 2]
1037     } else {
1038         assert!(
1039             comment.starts_with("/*"),
1040             format!("string '{}' is not a comment", comment)
1041         );
1042         &comment[2..comment.len() - 2]
1043     }
1044 }
1045
1046 #[cfg(test)]
1047 mod test {
1048     use super::{contains_comment, rewrite_comment, CharClasses, CodeCharKind, CommentCodeSlices,
1049                 FindUncommented, FullCodeCharKind};
1050     use shape::{Indent, Shape};
1051
1052     #[test]
1053     fn char_classes() {
1054         let mut iter = CharClasses::new("//\n\n".chars());
1055
1056         assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
1057         assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
1058         assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
1059         assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
1060         assert_eq!(None, iter.next());
1061     }
1062
1063     #[test]
1064     fn comment_code_slices() {
1065         let input = "code(); /* test */ 1 + 1";
1066         let mut iter = CommentCodeSlices::new(input);
1067
1068         assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap());
1069         assert_eq!(
1070             (CodeCharKind::Comment, 8, "/* test */"),
1071             iter.next().unwrap()
1072         );
1073         assert_eq!((CodeCharKind::Normal, 18, " 1 + 1"), iter.next().unwrap());
1074         assert_eq!(None, iter.next());
1075     }
1076
1077     #[test]
1078     fn comment_code_slices_two() {
1079         let input = "// comment\n    test();";
1080         let mut iter = CommentCodeSlices::new(input);
1081
1082         assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap());
1083         assert_eq!(
1084             (CodeCharKind::Comment, 0, "// comment\n"),
1085             iter.next().unwrap()
1086         );
1087         assert_eq!(
1088             (CodeCharKind::Normal, 11, "    test();"),
1089             iter.next().unwrap()
1090         );
1091         assert_eq!(None, iter.next());
1092     }
1093
1094     #[test]
1095     fn comment_code_slices_three() {
1096         let input = "1 // comment\n    // comment2\n\n";
1097         let mut iter = CommentCodeSlices::new(input);
1098
1099         assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap());
1100         assert_eq!(
1101             (CodeCharKind::Comment, 2, "// comment\n    // comment2\n"),
1102             iter.next().unwrap()
1103         );
1104         assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap());
1105         assert_eq!(None, iter.next());
1106     }
1107
1108     #[test]
1109     #[cfg_attr(rustfmt, rustfmt_skip)]
1110     fn format_comments() {
1111         let mut config: ::config::Config = Default::default();
1112         config.set().wrap_comments(true);
1113         config.set().normalize_comments(true);
1114
1115         let comment = rewrite_comment(" //test",
1116                                       true,
1117                                       Shape::legacy(100, Indent::new(0, 100)),
1118                                       &config).unwrap();
1119         assert_eq!("/* test */", comment);
1120
1121         let comment = rewrite_comment("// comment on a",
1122                                       false,
1123                                       Shape::legacy(10, Indent::empty()),
1124                                       &config).unwrap();
1125         assert_eq!("// comment\n// on a", comment);
1126
1127         let comment = rewrite_comment("//  A multi line comment\n             // between args.",
1128                                       false,
1129                                       Shape::legacy(60, Indent::new(0, 12)),
1130                                       &config).unwrap();
1131         assert_eq!("//  A multi line comment\n            // between args.", comment);
1132
1133         let input = "// comment";
1134         let expected =
1135             "/* comment */";
1136         let comment = rewrite_comment(input,
1137                                       true,
1138                                       Shape::legacy(9, Indent::new(0, 69)),
1139                                       &config).unwrap();
1140         assert_eq!(expected, comment);
1141
1142         let comment = rewrite_comment("/*   trimmed    */",
1143                                       true,
1144                                       Shape::legacy(100, Indent::new(0, 100)),
1145                                       &config).unwrap();
1146         assert_eq!("/* trimmed */", comment);
1147     }
1148
1149     // This is probably intended to be a non-test fn, but it is not used. I'm
1150     // keeping it around unless it helps us test stuff.
1151     fn uncommented(text: &str) -> String {
1152         CharClasses::new(text.chars())
1153             .filter_map(|(s, c)| match s {
1154                 FullCodeCharKind::Normal | FullCodeCharKind::InString => Some(c),
1155                 _ => None,
1156             })
1157             .collect()
1158     }
1159
1160     #[test]
1161     fn test_uncommented() {
1162         assert_eq!(&uncommented("abc/*...*/"), "abc");
1163         assert_eq!(
1164             &uncommented("// .... /* \n../* /* *** / */ */a/* // */c\n"),
1165             "..ac\n"
1166         );
1167         assert_eq!(&uncommented("abc \" /* */\" qsdf"), "abc \" /* */\" qsdf");
1168     }
1169
1170     #[test]
1171     fn test_contains_comment() {
1172         assert_eq!(contains_comment("abc"), false);
1173         assert_eq!(contains_comment("abc // qsdf"), true);
1174         assert_eq!(contains_comment("abc /* kqsdf"), true);
1175         assert_eq!(contains_comment("abc \" /* */\" qsdf"), false);
1176     }
1177
1178     #[test]
1179     fn test_find_uncommented() {
1180         fn check(haystack: &str, needle: &str, expected: Option<usize>) {
1181             assert_eq!(expected, haystack.find_uncommented(needle));
1182         }
1183
1184         check("/*/ */test", "test", Some(6));
1185         check("//test\ntest", "test", Some(7));
1186         check("/* comment only */", "whatever", None);
1187         check(
1188             "/* comment */ some text /* more commentary */ result",
1189             "result",
1190             Some(46),
1191         );
1192         check("sup // sup", "p", Some(2));
1193         check("sup", "x", None);
1194         check(r#"π? /**/ π is nice!"#, r#"π is nice"#, Some(9));
1195         check("/*sup yo? \n sup*/ sup", "p", Some(20));
1196         check("hel/*lohello*/lo", "hello", None);
1197         check("acb", "ab", None);
1198         check(",/*A*/ ", ",", Some(0));
1199         check("abc", "abc", Some(0));
1200         check("/* abc */", "abc", None);
1201         check("/**/abc/* */", "abc", Some(4));
1202         check("\"/* abc */\"", "abc", Some(4));
1203         check("\"/* abc", "abc", Some(4));
1204     }
1205 }