src/comment.rs

   1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 // Formatting and tools for comments.
  12
  13 use std::{self, iter, borrow::Cow};
  14
  15 use syntax::codemap::Span;
  16
  17 use config::Config;
  18 use rewrite::RewriteContext;
  19 use shape::{Indent, Shape};
  20 use string::{rewrite_string, StringFormat};
  21 use utils::{count_newlines, first_line_width, last_line_width};
  22
  23 fn is_custom_comment(comment: &str) -> bool {
  24     if !comment.starts_with("//") {
  25         false
  26     } else if let Some(c) = comment.chars().nth(2) {
  27         !c.is_alphanumeric() && !c.is_whitespace()
  28     } else {
  29         false
  30     }
  31 }
  32
  33 #[derive(Copy, Clone, PartialEq, Eq)]
  34 pub enum CommentStyle<'a> {
  35     DoubleSlash,
  36     TripleSlash,
  37     Doc,
  38     SingleBullet,
  39     DoubleBullet,
  40     Exclamation,
  41     Custom(&'a str),
  42 }
  43
  44 fn custom_opener(s: &str) -> &str {
  45     s.lines().next().map_or("", |first_line| {
  46         first_line
  47             .find(' ')
  48             .map_or(first_line, |space_index| &first_line[0..space_index + 1])
  49     })
  50 }
  51
  52 impl<'a> CommentStyle<'a> {
  53     pub fn is_doc_comment(&self) -> bool {
  54         match *self {
  55             CommentStyle::TripleSlash | CommentStyle::Doc => true,
  56             _ => false,
  57         }
  58     }
  59
  60     pub fn opener(&self) -> &'a str {
  61         match *self {
  62             CommentStyle::DoubleSlash => "// ",
  63             CommentStyle::TripleSlash => "/// ",
  64             CommentStyle::Doc => "//! ",
  65             CommentStyle::SingleBullet => "/* ",
  66             CommentStyle::DoubleBullet => "/** ",
  67             CommentStyle::Exclamation => "/*! ",
  68             CommentStyle::Custom(opener) => opener,
  69         }
  70     }
  71
  72     pub fn closer(&self) -> &'a str {
  73         match *self {
  74             CommentStyle::DoubleSlash
  75             | CommentStyle::TripleSlash
  76             | CommentStyle::Custom(..)
  77             | CommentStyle::Doc => "",
  78             CommentStyle::DoubleBullet => " **/",
  79             CommentStyle::SingleBullet | CommentStyle::Exclamation => " */",
  80         }
  81     }
  82
  83     pub fn line_start(&self) -> &'a str {
  84         match *self {
  85             CommentStyle::DoubleSlash => "// ",
  86             CommentStyle::TripleSlash => "/// ",
  87             CommentStyle::Doc => "//! ",
  88             CommentStyle::SingleBullet | CommentStyle::Exclamation => " * ",
  89             CommentStyle::DoubleBullet => " ** ",
  90             CommentStyle::Custom(opener) => opener,
  91         }
  92     }
  93
  94     pub fn to_str_tuplet(&self) -> (&'a str, &'a str, &'a str) {
  95         (self.opener(), self.closer(), self.line_start())
  96     }
  97
  98     pub fn line_with_same_comment_style(&self, line: &str, normalize_comments: bool) -> bool {
  99         match *self {
 100             CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
 101                 line.trim_left().starts_with(self.line_start().trim_left())
 102                     || comment_style(line, normalize_comments) == *self
 103             }
 104             CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
 105                 line.trim_left().starts_with(self.closer().trim_left())
 106                     || line.trim_left().starts_with(self.line_start().trim_left())
 107                     || comment_style(line, normalize_comments) == *self
 108             }
 109             CommentStyle::Custom(opener) => line.trim_left().starts_with(opener.trim_right()),
 110         }
 111     }
 112 }
 113
 114 fn comment_style(orig: &str, normalize_comments: bool) -> CommentStyle {
 115     if !normalize_comments {
 116         if orig.starts_with("/**") && !orig.starts_with("/**/") {
 117             CommentStyle::DoubleBullet
 118         } else if orig.starts_with("/*!") {
 119             CommentStyle::Exclamation
 120         } else if orig.starts_with("/*") {
 121             CommentStyle::SingleBullet
 122         } else if orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/') {
 123             CommentStyle::TripleSlash
 124         } else if orig.starts_with("//!") {
 125             CommentStyle::Doc
 126         } else if is_custom_comment(orig) {
 127             CommentStyle::Custom(custom_opener(orig))
 128         } else {
 129             CommentStyle::DoubleSlash
 130         }
 131     } else if (orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/'))
 132         || (orig.starts_with("/**") && !orig.starts_with("/**/"))
 133     {
 134         CommentStyle::TripleSlash
 135     } else if orig.starts_with("//!") || orig.starts_with("/*!") {
 136         CommentStyle::Doc
 137     } else if is_custom_comment(orig) {
 138         CommentStyle::Custom(custom_opener(orig))
 139     } else {
 140         CommentStyle::DoubleSlash
 141     }
 142 }
 143
 144 /// Combine `prev_str` and `next_str` into a single `String`. `span` may contain
 145 /// comments between two strings. If there are such comments, then that will be
 146 /// recovered. If `allow_extend` is true and there is no comment between the two
 147 /// strings, then they will be put on a single line as long as doing so does not
 148 /// exceed max width.
 149 pub fn combine_strs_with_missing_comments(
 150     context: &RewriteContext,
 151     prev_str: &str,
 152     next_str: &str,
 153     span: Span,
 154     shape: Shape,
 155     allow_extend: bool,
 156 ) -> Option<String> {
 157     let mut result =
 158         String::with_capacity(prev_str.len() + next_str.len() + shape.indent.width() + 128);
 159     result.push_str(prev_str);
 160     let mut allow_one_line = !prev_str.contains('\n') && !next_str.contains('\n');
 161     let first_sep = if prev_str.is_empty() || next_str.is_empty() {
 162         ""
 163     } else {
 164         " "
 165     };
 166     let mut one_line_width =
 167         last_line_width(prev_str) + first_line_width(next_str) + first_sep.len();
 168
 169     let config = context.config;
 170     let indent = shape.indent;
 171     let missing_comment = rewrite_missing_comment(span, shape, context)?;
 172
 173     if missing_comment.is_empty() {
 174         if allow_extend && prev_str.len() + first_sep.len() + next_str.len() <= shape.width {
 175             result.push_str(first_sep);
 176         } else if !prev_str.is_empty() {
 177             result.push_str(&indent.to_string_with_newline(config))
 178         }
 179         result.push_str(next_str);
 180         return Some(result);
 181     }
 182
 183     // We have a missing comment between the first expression and the second expression.
 184
 185     // Peek the the original source code and find out whether there is a newline between the first
 186     // expression and the second expression or the missing comment. We will preserve the original
 187     // layout whenever possible.
 188     let original_snippet = context.snippet(span);
 189     let prefer_same_line = if let Some(pos) = original_snippet.chars().position(|c| c == '/') {
 190         !original_snippet[..pos].contains('\n')
 191     } else {
 192         !original_snippet.contains('\n')
 193     };
 194
 195     one_line_width -= first_sep.len();
 196     let first_sep = if prev_str.is_empty() || missing_comment.is_empty() {
 197         Cow::from("")
 198     } else {
 199         let one_line_width = last_line_width(prev_str) + first_line_width(&missing_comment) + 1;
 200         if prefer_same_line && one_line_width <= shape.width {
 201             Cow::from(" ")
 202         } else {
 203             indent.to_string_with_newline(config)
 204         }
 205     };
 206     result.push_str(&first_sep);
 207     result.push_str(&missing_comment);
 208
 209     let second_sep = if missing_comment.is_empty() || next_str.is_empty() {
 210         Cow::from("")
 211     } else if missing_comment.starts_with("//") {
 212         indent.to_string_with_newline(config)
 213     } else {
 214         one_line_width += missing_comment.len() + first_sep.len() + 1;
 215         allow_one_line &= !missing_comment.starts_with("//") && !missing_comment.contains('\n');
 216         if prefer_same_line && allow_one_line && one_line_width <= shape.width {
 217             Cow::from(" ")
 218         } else {
 219             indent.to_string_with_newline(config)
 220         }
 221     };
 222     result.push_str(&second_sep);
 223     result.push_str(next_str);
 224
 225     Some(result)
 226 }
 227
 228 pub fn rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> Option<String> {
 229     _rewrite_comment(orig, false, shape, config, true)
 230 }
 231
 232 pub fn rewrite_comment(
 233     orig: &str,
 234     block_style: bool,
 235     shape: Shape,
 236     config: &Config,
 237 ) -> Option<String> {
 238     _rewrite_comment(orig, block_style, shape, config, false)
 239 }
 240
 241 fn _rewrite_comment(
 242     orig: &str,
 243     block_style: bool,
 244     shape: Shape,
 245     config: &Config,
 246     is_doc_comment: bool,
 247 ) -> Option<String> {
 248     // If there are lines without a starting sigil, we won't format them correctly
 249     // so in that case we won't even re-align (if !config.normalize_comments()) and
 250     // we should stop now.
 251     let num_bare_lines = orig.lines()
 252         .map(|line| line.trim())
 253         .filter(|l| !(l.starts_with('*') || l.starts_with("//") || l.starts_with("/*")))
 254         .count();
 255     if num_bare_lines > 0 && !config.normalize_comments() {
 256         return Some(orig.to_owned());
 257     }
 258     if !config.normalize_comments() && !config.wrap_comments() {
 259         return light_rewrite_comment(orig, shape.indent, config, is_doc_comment);
 260     }
 261
 262     identify_comment(orig, block_style, shape, config, is_doc_comment)
 263 }
 264
 265 fn identify_comment(
 266     orig: &str,
 267     block_style: bool,
 268     shape: Shape,
 269     config: &Config,
 270     is_doc_comment: bool,
 271 ) -> Option<String> {
 272     let style = comment_style(orig, false);
 273     let first_group = orig.lines()
 274         .take_while(|l| style.line_with_same_comment_style(l, false))
 275         .collect::<Vec<_>>()
 276         .join("\n");
 277     let rest = orig.lines()
 278         .skip(first_group.lines().count())
 279         .collect::<Vec<_>>()
 280         .join("\n");
 281
 282     let first_group_str = rewrite_comment_inner(
 283         &first_group,
 284         block_style,
 285         style,
 286         shape,
 287         config,
 288         is_doc_comment || style.is_doc_comment(),
 289     )?;
 290     if rest.is_empty() {
 291         Some(first_group_str)
 292     } else {
 293         identify_comment(&rest, block_style, shape, config, is_doc_comment).map(|rest_str| {
 294             format!(
 295                 "{}\n{}{}",
 296                 first_group_str,
 297                 shape.indent.to_string(config),
 298                 rest_str
 299             )
 300         })
 301     }
 302 }
 303
 304 fn rewrite_comment_inner(
 305     orig: &str,
 306     block_style: bool,
 307     style: CommentStyle,
 308     shape: Shape,
 309     config: &Config,
 310     is_doc_comment: bool,
 311 ) -> Option<String> {
 312     let (opener, closer, line_start) = if block_style {
 313         CommentStyle::SingleBullet.to_str_tuplet()
 314     } else {
 315         comment_style(orig, config.normalize_comments()).to_str_tuplet()
 316     };
 317
 318     let max_chars = shape
 319         .width
 320         .checked_sub(closer.len() + opener.len())
 321         .unwrap_or(1);
 322     let indent_str = shape.indent.to_string_with_newline(config);
 323     let fmt_indent = shape.indent + (opener.len() - line_start.len());
 324     let mut fmt = StringFormat {
 325         opener: "",
 326         closer: "",
 327         line_start,
 328         line_end: "",
 329         shape: Shape::legacy(max_chars, fmt_indent),
 330         trim_end: true,
 331         config,
 332     };
 333
 334     let line_breaks = count_newlines(orig.trim_right());
 335     let lines = orig.lines()
 336         .enumerate()
 337         .map(|(i, mut line)| {
 338             line = trim_right_unless_two_whitespaces(line.trim_left(), is_doc_comment);
 339             // Drop old closer.
 340             if i == line_breaks && line.ends_with("*/") && !line.starts_with("//") {
 341                 line = line[..(line.len() - 2)].trim_right();
 342             }
 343
 344             line
 345         })
 346         .map(|s| left_trim_comment_line(s, &style))
 347         .map(|(line, has_leading_whitespace)| {
 348             if orig.starts_with("/*") && line_breaks == 0 {
 349                 (
 350                     line.trim_left(),
 351                     has_leading_whitespace || config.normalize_comments(),
 352                 )
 353             } else {
 354                 (line, has_leading_whitespace || config.normalize_comments())
 355             }
 356         });
 357
 358     let mut result = String::with_capacity(orig.len() * 2);
 359     result.push_str(opener);
 360     let mut code_block_buffer = String::with_capacity(128);
 361     let mut is_prev_line_multi_line = false;
 362     let mut inside_code_block = false;
 363     let comment_line_separator = format!("{}{}", indent_str, line_start);
 364     let join_code_block_with_comment_line_separator = |s: &str| {
 365         let mut result = String::with_capacity(s.len() + 128);
 366         let mut iter = s.lines().peekable();
 367         while let Some(line) = iter.next() {
 368             result.push_str(line);
 369             result.push_str(match iter.peek() {
 370                 Some(next_line) if next_line.is_empty() => comment_line_separator.trim_right(),
 371                 Some(..) => &comment_line_separator,
 372                 None => "",
 373             });
 374         }
 375         result
 376     };
 377
 378     for (i, (line, has_leading_whitespace)) in lines.enumerate() {
 379         let is_last = i == count_newlines(orig);
 380
 381         if inside_code_block {
 382             if line.starts_with("```") {
 383                 inside_code_block = false;
 384                 result.push_str(&comment_line_separator);
 385                 let code_block = ::format_code_block(&code_block_buffer, config)
 386                     .unwrap_or_else(|| code_block_buffer.to_owned());
 387                 result.push_str(&join_code_block_with_comment_line_separator(&code_block));
 388                 code_block_buffer.clear();
 389                 result.push_str(&comment_line_separator);
 390                 result.push_str(line);
 391             } else {
 392                 code_block_buffer.push_str(line);
 393                 code_block_buffer.push('\n');
 394             }
 395
 396             continue;
 397         } else {
 398             inside_code_block = line.starts_with("```rust");
 399
 400             if result == opener {
 401                 let force_leading_whitespace = opener == "/* " && count_newlines(orig) == 0;
 402                 if !has_leading_whitespace && !force_leading_whitespace && result.ends_with(' ') {
 403                     result.pop();
 404                 }
 405                 if line.is_empty() {
 406                     continue;
 407                 }
 408             } else if is_prev_line_multi_line && !line.is_empty() {
 409                 result.push(' ')
 410             } else if is_last && !closer.is_empty() && line.is_empty() {
 411                 result.push_str(&indent_str);
 412             } else {
 413                 result.push_str(&comment_line_separator);
 414                 if !has_leading_whitespace && result.ends_with(' ') {
 415                     result.pop();
 416                 }
 417             }
 418         }
 419
 420         if config.wrap_comments() && line.len() > fmt.shape.width && !has_url(line) {
 421             match rewrite_string(line, &fmt, Some(max_chars)) {
 422                 Some(ref s) => {
 423                     is_prev_line_multi_line = s.contains('\n');
 424                     result.push_str(s);
 425                 }
 426                 None if is_prev_line_multi_line => {
 427                     // We failed to put the current `line` next to the previous `line`.
 428                     // Remove the trailing space, then start rewrite on the next line.
 429                     result.pop();
 430                     result.push_str(&comment_line_separator);
 431                     fmt.shape = Shape::legacy(max_chars, fmt_indent);
 432                     match rewrite_string(line, &fmt, Some(max_chars)) {
 433                         Some(ref s) => {
 434                             is_prev_line_multi_line = s.contains('\n');
 435                             result.push_str(s);
 436                         }
 437                         None => {
 438                             is_prev_line_multi_line = false;
 439                             result.push_str(line);
 440                         }
 441                     }
 442                 }
 443                 None => {
 444                     is_prev_line_multi_line = false;
 445                     result.push_str(line);
 446                 }
 447             }
 448
 449             fmt.shape = if is_prev_line_multi_line {
 450                 // 1 = " "
 451                 let offset = 1 + last_line_width(&result) - line_start.len();
 452                 Shape {
 453                     width: max_chars.checked_sub(offset).unwrap_or(0),
 454                     indent: fmt_indent,
 455                     offset: fmt.shape.offset + offset,
 456                 }
 457             } else {
 458                 Shape::legacy(max_chars, fmt_indent)
 459             };
 460         } else {
 461             if line.is_empty() && result.ends_with(' ') && !is_last {
 462                 // Remove space if this is an empty comment or a doc comment.
 463                 result.pop();
 464             }
 465             result.push_str(line);
 466             fmt.shape = Shape::legacy(max_chars, fmt_indent);
 467             is_prev_line_multi_line = false;
 468         }
 469     }
 470
 471     result.push_str(closer);
 472     if result.ends_with(opener) && opener.ends_with(' ') {
 473         // Trailing space.
 474         result.pop();
 475     }
 476
 477     Some(result)
 478 }
 479
 480 /// Returns true if the given string MAY include URLs or alike.
 481 fn has_url(s: &str) -> bool {
 482     // This function may return false positive, but should get its job done in most cases.
 483     s.contains("https://") || s.contains("http://") || s.contains("ftp://") || s.contains("file://")
 484 }
 485
 486 /// Given the span, rewrite the missing comment inside it if available.
 487 /// Note that the given span must only include comments (or leading/trailing whitespaces).
 488 pub fn rewrite_missing_comment(
 489     span: Span,
 490     shape: Shape,
 491     context: &RewriteContext,
 492 ) -> Option<String> {
 493     let missing_snippet = context.snippet(span);
 494     let trimmed_snippet = missing_snippet.trim();
 495     if !trimmed_snippet.is_empty() {
 496         rewrite_comment(trimmed_snippet, false, shape, context.config)
 497     } else {
 498         Some(String::new())
 499     }
 500 }
 501
 502 /// Recover the missing comments in the specified span, if available.
 503 /// The layout of the comments will be preserved as long as it does not break the code
 504 /// and its total width does not exceed the max width.
 505 pub fn recover_missing_comment_in_span(
 506     span: Span,
 507     shape: Shape,
 508     context: &RewriteContext,
 509     used_width: usize,
 510 ) -> Option<String> {
 511     let missing_comment = rewrite_missing_comment(span, shape, context)?;
 512     if missing_comment.is_empty() {
 513         Some(String::new())
 514     } else {
 515         let missing_snippet = context.snippet(span);
 516         let pos = missing_snippet.chars().position(|c| c == '/').unwrap_or(0);
 517         // 1 = ` `
 518         let total_width = missing_comment.len() + used_width + 1;
 519         let force_new_line_before_comment =
 520             missing_snippet[..pos].contains('\n') || total_width > context.config.max_width();
 521         let sep = if force_new_line_before_comment {
 522             shape.indent.to_string_with_newline(context.config)
 523         } else {
 524             Cow::from(" ")
 525         };
 526         Some(format!("{}{}", sep, missing_comment))
 527     }
 528 }
 529
 530 /// Trim trailing whitespaces unless they consist of two or more whitespaces.
 531 fn trim_right_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str {
 532     if is_doc_comment && s.ends_with("  ") {
 533         s
 534     } else {
 535         s.trim_right()
 536     }
 537 }
 538
 539 /// Trims whitespace and aligns to indent, but otherwise does not change comments.
 540 fn light_rewrite_comment(
 541     orig: &str,
 542     offset: Indent,
 543     config: &Config,
 544     is_doc_comment: bool,
 545 ) -> Option<String> {
 546     let lines: Vec<&str> = orig.lines()
 547         .map(|l| {
 548             // This is basically just l.trim(), but in the case that a line starts
 549             // with `*` we want to leave one space before it, so it aligns with the
 550             // `*` in `/*`.
 551             let first_non_whitespace = l.find(|c| !char::is_whitespace(c));
 552             let left_trimmed = if let Some(fnw) = first_non_whitespace {
 553                 if l.as_bytes()[fnw] == b'*' && fnw > 0 {
 554                     &l[fnw - 1..]
 555                 } else {
 556                     &l[fnw..]
 557                 }
 558             } else {
 559                 ""
 560             };
 561             // Preserve markdown's double-space line break syntax in doc comment.
 562             trim_right_unless_two_whitespaces(left_trimmed, is_doc_comment)
 563         })
 564         .collect();
 565     Some(lines.join(&format!("\n{}", offset.to_string(config))))
 566 }
 567
 568 /// Trims comment characters and possibly a single space from the left of a string.
 569 /// Does not trim all whitespace. If a single space is trimmed from the left of the string,
 570 /// this function returns true.
 571 fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle) -> (&'a str, bool) {
 572     if line.starts_with("//! ") || line.starts_with("/// ") || line.starts_with("/*! ")
 573         || line.starts_with("/** ")
 574     {
 575         (&line[4..], true)
 576     } else if let CommentStyle::Custom(opener) = *style {
 577         if line.starts_with(opener) {
 578             (&line[opener.len()..], true)
 579         } else {
 580             (&line[opener.trim_right().len()..], false)
 581         }
 582     } else if line.starts_with("/* ") || line.starts_with("// ") || line.starts_with("//!")
 583         || line.starts_with("///") || line.starts_with("** ")
 584         || line.starts_with("/*!")
 585         || (line.starts_with("/**") && !line.starts_with("/**/"))
 586     {
 587         (&line[3..], line.chars().nth(2).unwrap() == ' ')
 588     } else if line.starts_with("/*") || line.starts_with("* ") || line.starts_with("//")
 589         || line.starts_with("**")
 590     {
 591         (&line[2..], line.chars().nth(1).unwrap() == ' ')
 592     } else if line.starts_with('*') {
 593         (&line[1..], false)
 594     } else {
 595         (line, line.starts_with(' '))
 596     }
 597 }
 598
 599 pub trait FindUncommented {
 600     fn find_uncommented(&self, pat: &str) -> Option<usize>;
 601 }
 602
 603 impl FindUncommented for str {
 604     fn find_uncommented(&self, pat: &str) -> Option<usize> {
 605         let mut needle_iter = pat.chars();
 606         for (kind, (i, b)) in CharClasses::new(self.char_indices()) {
 607             match needle_iter.next() {
 608                 None => {
 609                     return Some(i - pat.len());
 610                 }
 611                 Some(c) => match kind {
 612                     FullCodeCharKind::Normal | FullCodeCharKind::InString if b == c => {}
 613                     _ => {
 614                         needle_iter = pat.chars();
 615                     }
 616                 },
 617             }
 618         }
 619
 620         // Handle case where the pattern is a suffix of the search string
 621         match needle_iter.next() {
 622             Some(_) => None,
 623             None => Some(self.len() - pat.len()),
 624         }
 625     }
 626 }
 627
 628 // Returns the first byte position after the first comment. The given string
 629 // is expected to be prefixed by a comment, including delimiters.
 630 // Good: "/* /* inner */ outer */ code();"
 631 // Bad:  "code(); // hello\n world!"
 632 pub fn find_comment_end(s: &str) -> Option<usize> {
 633     let mut iter = CharClasses::new(s.char_indices());
 634     for (kind, (i, _c)) in &mut iter {
 635         if kind == FullCodeCharKind::Normal || kind == FullCodeCharKind::InString {
 636             return Some(i);
 637         }
 638     }
 639
 640     // Handle case where the comment ends at the end of s.
 641     if iter.status == CharClassesStatus::Normal {
 642         Some(s.len())
 643     } else {
 644         None
 645     }
 646 }
 647
 648 /// Returns true if text contains any comment.
 649 pub fn contains_comment(text: &str) -> bool {
 650     CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
 651 }
 652
 653 /// Remove trailing spaces from the specified snippet. We do not remove spaces
 654 /// inside strings or comments.
 655 pub fn remove_trailing_white_spaces(text: &str) -> String {
 656     let mut buffer = String::with_capacity(text.len());
 657     let mut space_buffer = String::with_capacity(128);
 658     for (char_kind, c) in CharClasses::new(text.chars()) {
 659         match c {
 660             '\n' => {
 661                 if char_kind == FullCodeCharKind::InString {
 662                     buffer.push_str(&space_buffer);
 663                 }
 664                 space_buffer.clear();
 665                 buffer.push('\n');
 666             }
 667             _ if c.is_whitespace() => {
 668                 space_buffer.push(c);
 669             }
 670             _ => {
 671                 if !space_buffer.is_empty() {
 672                     buffer.push_str(&space_buffer);
 673                     space_buffer.clear();
 674                 }
 675                 buffer.push(c);
 676             }
 677         }
 678     }
 679     buffer
 680 }
 681
 682 pub struct CharClasses<T>
 683 where
 684     T: Iterator,
 685     T::Item: RichChar,
 686 {
 687     base: iter::Peekable<T>,
 688     status: CharClassesStatus,
 689 }
 690
 691 pub trait RichChar {
 692     fn get_char(&self) -> char;
 693 }
 694
 695 impl RichChar for char {
 696     fn get_char(&self) -> char {
 697         *self
 698     }
 699 }
 700
 701 impl RichChar for (usize, char) {
 702     fn get_char(&self) -> char {
 703         self.1
 704     }
 705 }
 706
 707 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 708 enum CharClassesStatus {
 709     Normal,
 710     LitString,
 711     LitStringEscape,
 712     LitChar,
 713     LitCharEscape,
 714     // The u32 is the nesting deepness of the comment
 715     BlockComment(u32),
 716     // Status when the '/' has been consumed, but not yet the '*', deepness is
 717     // the new deepness (after the comment opening).
 718     BlockCommentOpening(u32),
 719     // Status when the '*' has been consumed, but not yet the '/', deepness is
 720     // the new deepness (after the comment closing).
 721     BlockCommentClosing(u32),
 722     LineComment,
 723 }
 724
 725 /// Distinguish between functional part of code and comments
 726 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 727 pub enum CodeCharKind {
 728     Normal,
 729     Comment,
 730 }
 731
 732 /// Distinguish between functional part of code and comments,
 733 /// describing opening and closing of comments for ease when chunking
 734 /// code from tagged characters
 735 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 736 pub enum FullCodeCharKind {
 737     Normal,
 738     /// The first character of a comment, there is only one for a comment (always '/')
 739     StartComment,
 740     /// Any character inside a comment including the second character of comment
 741     /// marks ("//", "/*")
 742     InComment,
 743     /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
 744     EndComment,
 745     /// Inside a string.
 746     InString,
 747 }
 748
 749 impl FullCodeCharKind {
 750     pub fn is_comment(&self) -> bool {
 751         match *self {
 752             FullCodeCharKind::StartComment
 753             | FullCodeCharKind::InComment
 754             | FullCodeCharKind::EndComment => true,
 755             _ => false,
 756         }
 757     }
 758
 759     pub fn is_string(&self) -> bool {
 760         *self == FullCodeCharKind::InString
 761     }
 762
 763     fn to_codecharkind(&self) -> CodeCharKind {
 764         if self.is_comment() {
 765             CodeCharKind::Comment
 766         } else {
 767             CodeCharKind::Normal
 768         }
 769     }
 770 }
 771
 772 impl<T> CharClasses<T>
 773 where
 774     T: Iterator,
 775     T::Item: RichChar,
 776 {
 777     pub fn new(base: T) -> CharClasses<T> {
 778         CharClasses {
 779             base: base.peekable(),
 780             status: CharClassesStatus::Normal,
 781         }
 782     }
 783 }
 784
 785 impl<T> Iterator for CharClasses<T>
 786 where
 787     T: Iterator,
 788     T::Item: RichChar,
 789 {
 790     type Item = (FullCodeCharKind, T::Item);
 791
 792     fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
 793         let item = self.base.next()?;
 794         let chr = item.get_char();
 795         let mut char_kind = FullCodeCharKind::Normal;
 796         self.status = match self.status {
 797             CharClassesStatus::LitString => match chr {
 798                 '"' => CharClassesStatus::Normal,
 799                 '\\' => {
 800                     char_kind = FullCodeCharKind::InString;
 801                     CharClassesStatus::LitStringEscape
 802                 }
 803                 _ => {
 804                     char_kind = FullCodeCharKind::InString;
 805                     CharClassesStatus::LitString
 806                 }
 807             },
 808             CharClassesStatus::LitStringEscape => {
 809                 char_kind = FullCodeCharKind::InString;
 810                 CharClassesStatus::LitString
 811             }
 812             CharClassesStatus::LitChar => match chr {
 813                 '\\' => CharClassesStatus::LitCharEscape,
 814                 '\'' => CharClassesStatus::Normal,
 815                 _ => CharClassesStatus::LitChar,
 816             },
 817             CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
 818             CharClassesStatus::Normal => match chr {
 819                 '"' => {
 820                     char_kind = FullCodeCharKind::InString;
 821                     CharClassesStatus::LitString
 822                 }
 823                 '\'' => CharClassesStatus::LitChar,
 824                 '/' => match self.base.peek() {
 825                     Some(next) if next.get_char() == '*' => {
 826                         self.status = CharClassesStatus::BlockCommentOpening(1);
 827                         return Some((FullCodeCharKind::StartComment, item));
 828                     }
 829                     Some(next) if next.get_char() == '/' => {
 830                         self.status = CharClassesStatus::LineComment;
 831                         return Some((FullCodeCharKind::StartComment, item));
 832                     }
 833                     _ => CharClassesStatus::Normal,
 834                 },
 835                 _ => CharClassesStatus::Normal,
 836             },
 837             CharClassesStatus::BlockComment(deepness) => {
 838                 assert_ne!(deepness, 0);
 839                 self.status = match self.base.peek() {
 840                     Some(next) if next.get_char() == '/' && chr == '*' => {
 841                         CharClassesStatus::BlockCommentClosing(deepness - 1)
 842                     }
 843                     Some(next) if next.get_char() == '*' && chr == '/' => {
 844                         CharClassesStatus::BlockCommentOpening(deepness + 1)
 845                     }
 846                     _ => CharClassesStatus::BlockComment(deepness),
 847                 };
 848                 return Some((FullCodeCharKind::InComment, item));
 849             }
 850             CharClassesStatus::BlockCommentOpening(deepness) => {
 851                 assert_eq!(chr, '*');
 852                 self.status = CharClassesStatus::BlockComment(deepness);
 853                 return Some((FullCodeCharKind::InComment, item));
 854             }
 855             CharClassesStatus::BlockCommentClosing(deepness) => {
 856                 assert_eq!(chr, '/');
 857                 if deepness == 0 {
 858                     self.status = CharClassesStatus::Normal;
 859                     return Some((FullCodeCharKind::EndComment, item));
 860                 } else {
 861                     self.status = CharClassesStatus::BlockComment(deepness);
 862                     return Some((FullCodeCharKind::InComment, item));
 863                 }
 864             }
 865             CharClassesStatus::LineComment => match chr {
 866                 '\n' => {
 867                     self.status = CharClassesStatus::Normal;
 868                     return Some((FullCodeCharKind::EndComment, item));
 869                 }
 870                 _ => {
 871                     self.status = CharClassesStatus::LineComment;
 872                     return Some((FullCodeCharKind::InComment, item));
 873                 }
 874             },
 875         };
 876         Some((char_kind, item))
 877     }
 878 }
 879
 880 /// Iterator over functional and commented parts of a string. Any part of a string is either
 881 /// functional code, either *one* block comment, either *one* line comment. Whitespace between
 882 /// comments is functional code. Line comments contain their ending newlines.
 883 struct UngroupedCommentCodeSlices<'a> {
 884     slice: &'a str,
 885     iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
 886 }
 887
 888 impl<'a> UngroupedCommentCodeSlices<'a> {
 889     fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
 890         UngroupedCommentCodeSlices {
 891             slice: code,
 892             iter: CharClasses::new(code.char_indices()).peekable(),
 893         }
 894     }
 895 }
 896
 897 impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
 898     type Item = (CodeCharKind, usize, &'a str);
 899
 900     fn next(&mut self) -> Option<Self::Item> {
 901         let (kind, (start_idx, _)) = self.iter.next()?;
 902         match kind {
 903             FullCodeCharKind::Normal | FullCodeCharKind::InString => {
 904                 // Consume all the Normal code
 905                 while let Some(&(char_kind, _)) = self.iter.peek() {
 906                     if char_kind.is_comment() {
 907                         break;
 908                     }
 909                     let _ = self.iter.next();
 910                 }
 911             }
 912             FullCodeCharKind::StartComment => {
 913                 // Consume the whole comment
 914                 while let Some((FullCodeCharKind::InComment, (_, _))) = self.iter.next() {}
 915             }
 916             _ => panic!(),
 917         }
 918         let slice = match self.iter.peek() {
 919             Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
 920             None => &self.slice[start_idx..],
 921         };
 922         Some((
 923             if kind.is_comment() {
 924                 CodeCharKind::Comment
 925             } else {
 926                 CodeCharKind::Normal
 927             },
 928             start_idx,
 929             slice,
 930         ))
 931     }
 932 }
 933
 934 /// Iterator over an alternating sequence of functional and commented parts of
 935 /// a string. The first item is always a, possibly zero length, subslice of
 936 /// functional text. Line style comments contain their ending newlines.
 937 pub struct CommentCodeSlices<'a> {
 938     slice: &'a str,
 939     last_slice_kind: CodeCharKind,
 940     last_slice_end: usize,
 941 }
 942
 943 impl<'a> CommentCodeSlices<'a> {
 944     pub fn new(slice: &'a str) -> CommentCodeSlices<'a> {
 945         CommentCodeSlices {
 946             slice,
 947             last_slice_kind: CodeCharKind::Comment,
 948             last_slice_end: 0,
 949         }
 950     }
 951 }
 952
 953 impl<'a> Iterator for CommentCodeSlices<'a> {
 954     type Item = (CodeCharKind, usize, &'a str);
 955
 956     fn next(&mut self) -> Option<Self::Item> {
 957         if self.last_slice_end == self.slice.len() {
 958             return None;
 959         }
 960
 961         let mut sub_slice_end = self.last_slice_end;
 962         let mut first_whitespace = None;
 963         let subslice = &self.slice[self.last_slice_end..];
 964         let mut iter = CharClasses::new(subslice.char_indices());
 965
 966         for (kind, (i, c)) in &mut iter {
 967             let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal
 968                 && &subslice[..2] == "//"
 969                 && [' ', '\t'].contains(&c);
 970
 971             if is_comment_connector && first_whitespace.is_none() {
 972                 first_whitespace = Some(i);
 973             }
 974
 975             if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
 976                 let last_index = match first_whitespace {
 977                     Some(j) => j,
 978                     None => i,
 979                 };
 980                 sub_slice_end = self.last_slice_end + last_index;
 981                 break;
 982             }
 983
 984             if !is_comment_connector {
 985                 first_whitespace = None;
 986             }
 987         }
 988
 989         if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) {
 990             // This was the last subslice.
 991             sub_slice_end = match first_whitespace {
 992                 Some(i) => self.last_slice_end + i,
 993                 None => self.slice.len(),
 994             };
 995         }
 996
 997         let kind = match self.last_slice_kind {
 998             CodeCharKind::Comment => CodeCharKind::Normal,
 999             CodeCharKind::Normal => CodeCharKind::Comment,
1000         };
1001         let res = (
1002             kind,
1003             self.last_slice_end,
1004             &self.slice[self.last_slice_end..sub_slice_end],
1005         );
1006         self.last_slice_end = sub_slice_end;
1007         self.last_slice_kind = kind;
1008
1009         Some(res)
1010     }
1011 }
1012
1013 /// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
1014 /// (if it fits in the width/offset, else return None), else return `new`
1015 pub fn recover_comment_removed(
1016     new: String,
1017     span: Span,
1018     context: &RewriteContext,
1019 ) -> Option<String> {
1020     let snippet = context.snippet(span);
1021     if snippet != new && changed_comment_content(snippet, &new) {
1022         // We missed some comments. Keep the original text.
1023         Some(snippet.to_owned())
1024     } else {
1025         Some(new)
1026     }
1027 }
1028
1029 /// Return true if the two strings of code have the same payload of comments.
1030 /// The payload of comments is everything in the string except:
1031 ///     - actual code (not comments)
1032 ///     - comment start/end marks
1033 ///     - whitespace
1034 ///     - '*' at the beginning of lines in block comments
1035 fn changed_comment_content(orig: &str, new: &str) -> bool {
1036     // Cannot write this as a fn since we cannot return types containing closures
1037     let code_comment_content = |code| {
1038         let slices = UngroupedCommentCodeSlices::new(code);
1039         slices
1040             .filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
1041             .flat_map(|(_, _, s)| CommentReducer::new(s))
1042     };
1043     let res = code_comment_content(orig).ne(code_comment_content(new));
1044     debug!(
1045         "comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
1046         res,
1047         orig,
1048         new,
1049         code_comment_content(orig).collect::<String>(),
1050         code_comment_content(new).collect::<String>()
1051     );
1052     res
1053 }
1054
1055 /// Iterator over the 'payload' characters of a comment.
1056 /// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
1057 /// The comment must be one comment, ie not more than one start mark (no multiple line comments,
1058 /// for example).
1059 struct CommentReducer<'a> {
1060     is_block: bool,
1061     at_start_line: bool,
1062     iter: std::str::Chars<'a>,
1063 }
1064
1065 impl<'a> CommentReducer<'a> {
1066     fn new(comment: &'a str) -> CommentReducer<'a> {
1067         let is_block = comment.starts_with("/*");
1068         let comment = remove_comment_header(comment);
1069         CommentReducer {
1070             is_block,
1071             at_start_line: false, // There are no supplementary '*' on the first line
1072             iter: comment.chars(),
1073         }
1074     }
1075 }
1076
1077 impl<'a> Iterator for CommentReducer<'a> {
1078     type Item = char;
1079     fn next(&mut self) -> Option<Self::Item> {
1080         loop {
1081             let mut c = self.iter.next()?;
1082             if self.is_block && self.at_start_line {
1083                 while c.is_whitespace() {
1084                     c = self.iter.next()?;
1085                 }
1086                 // Ignore leading '*'
1087                 if c == '*' {
1088                     c = self.iter.next()?;
1089                 }
1090             } else if c == '\n' {
1091                 self.at_start_line = true;
1092             }
1093             if !c.is_whitespace() {
1094                 return Some(c);
1095             }
1096         }
1097     }
1098 }
1099
1100 fn remove_comment_header(comment: &str) -> &str {
1101     if comment.starts_with("///") || comment.starts_with("//!") {
1102         &comment[3..]
1103     } else if comment.starts_with("//") {
1104         &comment[2..]
1105     } else if (comment.starts_with("/**") && !comment.starts_with("/**/"))
1106         || comment.starts_with("/*!")
1107     {
1108         &comment[3..comment.len() - 2]
1109     } else {
1110         assert!(
1111             comment.starts_with("/*"),
1112             format!("string '{}' is not a comment", comment)
1113         );
1114         &comment[2..comment.len() - 2]
1115     }
1116 }
1117
1118 #[cfg(test)]
1119 mod test {
1120     use super::{contains_comment, rewrite_comment, CharClasses, CodeCharKind, CommentCodeSlices,
1121                 FindUncommented, FullCodeCharKind};
1122     use shape::{Indent, Shape};
1123
1124     #[test]
1125     fn char_classes() {
1126         let mut iter = CharClasses::new("//\n\n".chars());
1127
1128         assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
1129         assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
1130         assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
1131         assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
1132         assert_eq!(None, iter.next());
1133     }
1134
1135     #[test]
1136     fn comment_code_slices() {
1137         let input = "code(); /* test */ 1 + 1";
1138         let mut iter = CommentCodeSlices::new(input);
1139
1140         assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap());
1141         assert_eq!(
1142             (CodeCharKind::Comment, 8, "/* test */"),
1143             iter.next().unwrap()
1144         );
1145         assert_eq!((CodeCharKind::Normal, 18, " 1 + 1"), iter.next().unwrap());
1146         assert_eq!(None, iter.next());
1147     }
1148
1149     #[test]
1150     fn comment_code_slices_two() {
1151         let input = "// comment\n    test();";
1152         let mut iter = CommentCodeSlices::new(input);
1153
1154         assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap());
1155         assert_eq!(
1156             (CodeCharKind::Comment, 0, "// comment\n"),
1157             iter.next().unwrap()
1158         );
1159         assert_eq!(
1160             (CodeCharKind::Normal, 11, "    test();"),
1161             iter.next().unwrap()
1162         );
1163         assert_eq!(None, iter.next());
1164     }
1165
1166     #[test]
1167     fn comment_code_slices_three() {
1168         let input = "1 // comment\n    // comment2\n\n";
1169         let mut iter = CommentCodeSlices::new(input);
1170
1171         assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap());
1172         assert_eq!(
1173             (CodeCharKind::Comment, 2, "// comment\n    // comment2\n"),
1174             iter.next().unwrap()
1175         );
1176         assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap());
1177         assert_eq!(None, iter.next());
1178     }
1179
1180     #[test]
1181     #[cfg_attr(rustfmt, rustfmt_skip)]
1182     fn format_comments() {
1183         let mut config: ::config::Config = Default::default();
1184         config.set().wrap_comments(true);
1185         config.set().normalize_comments(true);
1186
1187         let comment = rewrite_comment(" //test",
1188                                       true,
1189                                       Shape::legacy(100, Indent::new(0, 100)),
1190                                       &config).unwrap();
1191         assert_eq!("/* test */", comment);
1192
1193         let comment = rewrite_comment("// comment on a",
1194                                       false,
1195                                       Shape::legacy(10, Indent::empty()),
1196                                       &config).unwrap();
1197         assert_eq!("// comment\n// on a", comment);
1198
1199         let comment = rewrite_comment("//  A multi line comment\n             // between args.",
1200                                       false,
1201                                       Shape::legacy(60, Indent::new(0, 12)),
1202                                       &config).unwrap();
1203         assert_eq!("//  A multi line comment\n            // between args.", comment);
1204
1205         let input = "// comment";
1206         let expected =
1207             "/* comment */";
1208         let comment = rewrite_comment(input,
1209                                       true,
1210                                       Shape::legacy(9, Indent::new(0, 69)),
1211                                       &config).unwrap();
1212         assert_eq!(expected, comment);
1213
1214         let comment = rewrite_comment("/*   trimmed    */",
1215                                       true,
1216                                       Shape::legacy(100, Indent::new(0, 100)),
1217                                       &config).unwrap();
1218         assert_eq!("/* trimmed */", comment);
1219     }
1220
1221     // This is probably intended to be a non-test fn, but it is not used. I'm
1222     // keeping it around unless it helps us test stuff.
1223     fn uncommented(text: &str) -> String {
1224         CharClasses::new(text.chars())
1225             .filter_map(|(s, c)| match s {
1226                 FullCodeCharKind::Normal | FullCodeCharKind::InString => Some(c),
1227                 _ => None,
1228             })
1229             .collect()
1230     }
1231
1232     #[test]
1233     fn test_uncommented() {
1234         assert_eq!(&uncommented("abc/*...*/"), "abc");
1235         assert_eq!(
1236             &uncommented("// .... /* \n../* /* *** / */ */a/* // */c\n"),
1237             "..ac\n"
1238         );
1239         assert_eq!(&uncommented("abc \" /* */\" qsdf"), "abc \" /* */\" qsdf");
1240     }
1241
1242     #[test]
1243     fn test_contains_comment() {
1244         assert_eq!(contains_comment("abc"), false);
1245         assert_eq!(contains_comment("abc // qsdf"), true);
1246         assert_eq!(contains_comment("abc /* kqsdf"), true);
1247         assert_eq!(contains_comment("abc \" /* */\" qsdf"), false);
1248     }
1249
1250     #[test]
1251     fn test_find_uncommented() {
1252         fn check(haystack: &str, needle: &str, expected: Option<usize>) {
1253             assert_eq!(expected, haystack.find_uncommented(needle));
1254         }
1255
1256         check("/*/ */test", "test", Some(6));
1257         check("//test\ntest", "test", Some(7));
1258         check("/* comment only */", "whatever", None);
1259         check(
1260             "/* comment */ some text /* more commentary */ result",
1261             "result",
1262             Some(46),
1263         );
1264         check("sup // sup", "p", Some(2));
1265         check("sup", "x", None);
1266         check(r#"π? /**/ π is nice!"#, r#"π is nice"#, Some(9));
1267         check("/*sup yo? \n sup*/ sup", "p", Some(20));
1268         check("hel/*lohello*/lo", "hello", None);
1269         check("acb", "ab", None);
1270         check(",/*A*/ ", ",", Some(0));
1271         check("abc", "abc", Some(0));
1272         check("/* abc */", "abc", None);
1273         check("/**/abc/* */", "abc", Some(4));
1274         check("\"/* abc */\"", "abc", Some(4));
1275         check("\"/* abc", "abc", Some(4));
1276     }
1277 }