src/comment.rs

   1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 // Formatting and tools for comments.
  12
  13 use std::{self, iter, borrow::Cow};
  14
  15 use itertools::{multipeek, MultiPeek};
  16 use syntax::codemap::Span;
  17
  18 use config::Config;
  19 use rewrite::RewriteContext;
  20 use shape::{Indent, Shape};
  21 use string::{rewrite_string, StringFormat};
  22 use utils::{count_newlines, first_line_width, last_line_width};
  23
  24 fn is_custom_comment(comment: &str) -> bool {
  25     if !comment.starts_with("//") {
  26         false
  27     } else if let Some(c) = comment.chars().nth(2) {
  28         !c.is_alphanumeric() && !c.is_whitespace()
  29     } else {
  30         false
  31     }
  32 }
  33
  34 #[derive(Copy, Clone, PartialEq, Eq)]
  35 pub enum CommentStyle<'a> {
  36     DoubleSlash,
  37     TripleSlash,
  38     Doc,
  39     SingleBullet,
  40     DoubleBullet,
  41     Exclamation,
  42     Custom(&'a str),
  43 }
  44
  45 fn custom_opener(s: &str) -> &str {
  46     s.lines().next().map_or("", |first_line| {
  47         first_line
  48             .find(' ')
  49             .map_or(first_line, |space_index| &first_line[0..space_index + 1])
  50     })
  51 }
  52
  53 impl<'a> CommentStyle<'a> {
  54     pub fn is_doc_comment(&self) -> bool {
  55         match *self {
  56             CommentStyle::TripleSlash | CommentStyle::Doc => true,
  57             _ => false,
  58         }
  59     }
  60
  61     pub fn opener(&self) -> &'a str {
  62         match *self {
  63             CommentStyle::DoubleSlash => "// ",
  64             CommentStyle::TripleSlash => "/// ",
  65             CommentStyle::Doc => "//! ",
  66             CommentStyle::SingleBullet => "/* ",
  67             CommentStyle::DoubleBullet => "/** ",
  68             CommentStyle::Exclamation => "/*! ",
  69             CommentStyle::Custom(opener) => opener,
  70         }
  71     }
  72
  73     pub fn closer(&self) -> &'a str {
  74         match *self {
  75             CommentStyle::DoubleSlash
  76             | CommentStyle::TripleSlash
  77             | CommentStyle::Custom(..)
  78             | CommentStyle::Doc => "",
  79             CommentStyle::DoubleBullet => " **/",
  80             CommentStyle::SingleBullet | CommentStyle::Exclamation => " */",
  81         }
  82     }
  83
  84     pub fn line_start(&self) -> &'a str {
  85         match *self {
  86             CommentStyle::DoubleSlash => "// ",
  87             CommentStyle::TripleSlash => "/// ",
  88             CommentStyle::Doc => "//! ",
  89             CommentStyle::SingleBullet | CommentStyle::Exclamation => " * ",
  90             CommentStyle::DoubleBullet => " ** ",
  91             CommentStyle::Custom(opener) => opener,
  92         }
  93     }
  94
  95     pub fn to_str_tuplet(&self) -> (&'a str, &'a str, &'a str) {
  96         (self.opener(), self.closer(), self.line_start())
  97     }
  98
  99     pub fn line_with_same_comment_style(&self, line: &str, normalize_comments: bool) -> bool {
 100         match *self {
 101             CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
 102                 line.trim_left().starts_with(self.line_start().trim_left())
 103                     || comment_style(line, normalize_comments) == *self
 104             }
 105             CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
 106                 line.trim_left().starts_with(self.closer().trim_left())
 107                     || line.trim_left().starts_with(self.line_start().trim_left())
 108                     || comment_style(line, normalize_comments) == *self
 109             }
 110             CommentStyle::Custom(opener) => line.trim_left().starts_with(opener.trim_right()),
 111         }
 112     }
 113 }
 114
 115 fn comment_style(orig: &str, normalize_comments: bool) -> CommentStyle {
 116     if !normalize_comments {
 117         if orig.starts_with("/**") && !orig.starts_with("/**/") {
 118             CommentStyle::DoubleBullet
 119         } else if orig.starts_with("/*!") {
 120             CommentStyle::Exclamation
 121         } else if orig.starts_with("/*") {
 122             CommentStyle::SingleBullet
 123         } else if orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/') {
 124             CommentStyle::TripleSlash
 125         } else if orig.starts_with("//!") {
 126             CommentStyle::Doc
 127         } else if is_custom_comment(orig) {
 128             CommentStyle::Custom(custom_opener(orig))
 129         } else {
 130             CommentStyle::DoubleSlash
 131         }
 132     } else if (orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/'))
 133         || (orig.starts_with("/**") && !orig.starts_with("/**/"))
 134     {
 135         CommentStyle::TripleSlash
 136     } else if orig.starts_with("//!") || orig.starts_with("/*!") {
 137         CommentStyle::Doc
 138     } else if is_custom_comment(orig) {
 139         CommentStyle::Custom(custom_opener(orig))
 140     } else {
 141         CommentStyle::DoubleSlash
 142     }
 143 }
 144
 145 /// Combine `prev_str` and `next_str` into a single `String`. `span` may contain
 146 /// comments between two strings. If there are such comments, then that will be
 147 /// recovered. If `allow_extend` is true and there is no comment between the two
 148 /// strings, then they will be put on a single line as long as doing so does not
 149 /// exceed max width.
 150 pub fn combine_strs_with_missing_comments(
 151     context: &RewriteContext,
 152     prev_str: &str,
 153     next_str: &str,
 154     span: Span,
 155     shape: Shape,
 156     allow_extend: bool,
 157 ) -> Option<String> {
 158     let mut result =
 159         String::with_capacity(prev_str.len() + next_str.len() + shape.indent.width() + 128);
 160     result.push_str(prev_str);
 161     let mut allow_one_line = !prev_str.contains('\n') && !next_str.contains('\n');
 162     let first_sep = if prev_str.is_empty() || next_str.is_empty() {
 163         ""
 164     } else {
 165         " "
 166     };
 167     let mut one_line_width =
 168         last_line_width(prev_str) + first_line_width(next_str) + first_sep.len();
 169
 170     let config = context.config;
 171     let indent = shape.indent;
 172     let missing_comment = rewrite_missing_comment(span, shape, context)?;
 173
 174     if missing_comment.is_empty() {
 175         if allow_extend && prev_str.len() + first_sep.len() + next_str.len() <= shape.width {
 176             result.push_str(first_sep);
 177         } else if !prev_str.is_empty() {
 178             result.push_str(&indent.to_string_with_newline(config))
 179         }
 180         result.push_str(next_str);
 181         return Some(result);
 182     }
 183
 184     // We have a missing comment between the first expression and the second expression.
 185
 186     // Peek the the original source code and find out whether there is a newline between the first
 187     // expression and the second expression or the missing comment. We will preserve the original
 188     // layout whenever possible.
 189     let original_snippet = context.snippet(span);
 190     let prefer_same_line = if let Some(pos) = original_snippet.chars().position(|c| c == '/') {
 191         !original_snippet[..pos].contains('\n')
 192     } else {
 193         !original_snippet.contains('\n')
 194     };
 195
 196     one_line_width -= first_sep.len();
 197     let first_sep = if prev_str.is_empty() || missing_comment.is_empty() {
 198         Cow::from("")
 199     } else {
 200         let one_line_width = last_line_width(prev_str) + first_line_width(&missing_comment) + 1;
 201         if prefer_same_line && one_line_width <= shape.width {
 202             Cow::from(" ")
 203         } else {
 204             indent.to_string_with_newline(config)
 205         }
 206     };
 207     result.push_str(&first_sep);
 208     result.push_str(&missing_comment);
 209
 210     let second_sep = if missing_comment.is_empty() || next_str.is_empty() {
 211         Cow::from("")
 212     } else if missing_comment.starts_with("//") {
 213         indent.to_string_with_newline(config)
 214     } else {
 215         one_line_width += missing_comment.len() + first_sep.len() + 1;
 216         allow_one_line &= !missing_comment.starts_with("//") && !missing_comment.contains('\n');
 217         if prefer_same_line && allow_one_line && one_line_width <= shape.width {
 218             Cow::from(" ")
 219         } else {
 220             indent.to_string_with_newline(config)
 221         }
 222     };
 223     result.push_str(&second_sep);
 224     result.push_str(next_str);
 225
 226     Some(result)
 227 }
 228
 229 pub fn rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> Option<String> {
 230     _rewrite_comment(orig, false, shape, config, true)
 231 }
 232
 233 pub fn rewrite_comment(
 234     orig: &str,
 235     block_style: bool,
 236     shape: Shape,
 237     config: &Config,
 238 ) -> Option<String> {
 239     _rewrite_comment(orig, block_style, shape, config, false)
 240 }
 241
 242 fn _rewrite_comment(
 243     orig: &str,
 244     block_style: bool,
 245     shape: Shape,
 246     config: &Config,
 247     is_doc_comment: bool,
 248 ) -> Option<String> {
 249     // If there are lines without a starting sigil, we won't format them correctly
 250     // so in that case we won't even re-align (if !config.normalize_comments()) and
 251     // we should stop now.
 252     let num_bare_lines = orig.lines()
 253         .map(|line| line.trim())
 254         .filter(|l| !(l.starts_with('*') || l.starts_with("//") || l.starts_with("/*")))
 255         .count();
 256     if num_bare_lines > 0 && !config.normalize_comments() {
 257         return Some(orig.to_owned());
 258     }
 259     if !config.normalize_comments() && !config.wrap_comments() {
 260         return light_rewrite_comment(orig, shape.indent, config, is_doc_comment);
 261     }
 262
 263     identify_comment(orig, block_style, shape, config, is_doc_comment)
 264 }
 265
 266 fn identify_comment(
 267     orig: &str,
 268     block_style: bool,
 269     shape: Shape,
 270     config: &Config,
 271     is_doc_comment: bool,
 272 ) -> Option<String> {
 273     let style = comment_style(orig, false);
 274     let first_group = orig.lines()
 275         .take_while(|l| style.line_with_same_comment_style(l, false))
 276         .collect::<Vec<_>>()
 277         .join("\n");
 278     let rest = orig.lines()
 279         .skip(first_group.lines().count())
 280         .collect::<Vec<_>>()
 281         .join("\n");
 282
 283     let first_group_str = rewrite_comment_inner(
 284         &first_group,
 285         block_style,
 286         style,
 287         shape,
 288         config,
 289         is_doc_comment || style.is_doc_comment(),
 290     )?;
 291     if rest.is_empty() {
 292         Some(first_group_str)
 293     } else {
 294         identify_comment(&rest, block_style, shape, config, is_doc_comment).map(|rest_str| {
 295             format!(
 296                 "{}\n{}{}",
 297                 first_group_str,
 298                 shape.indent.to_string(config),
 299                 rest_str
 300             )
 301         })
 302     }
 303 }
 304
 305 fn rewrite_comment_inner(
 306     orig: &str,
 307     block_style: bool,
 308     style: CommentStyle,
 309     shape: Shape,
 310     config: &Config,
 311     is_doc_comment: bool,
 312 ) -> Option<String> {
 313     let (opener, closer, line_start) = if block_style {
 314         CommentStyle::SingleBullet.to_str_tuplet()
 315     } else {
 316         comment_style(orig, config.normalize_comments()).to_str_tuplet()
 317     };
 318
 319     let max_chars = shape
 320         .width
 321         .checked_sub(closer.len() + opener.len())
 322         .unwrap_or(1);
 323     let indent_str = shape.indent.to_string_with_newline(config);
 324     let fmt_indent = shape.indent + (opener.len() - line_start.len());
 325     let mut fmt = StringFormat {
 326         opener: "",
 327         closer: "",
 328         line_start,
 329         line_end: "",
 330         shape: Shape::legacy(max_chars, fmt_indent),
 331         trim_end: true,
 332         config,
 333     };
 334
 335     let line_breaks = count_newlines(orig.trim_right());
 336     let lines = orig.lines()
 337         .enumerate()
 338         .map(|(i, mut line)| {
 339             line = trim_right_unless_two_whitespaces(line.trim_left(), is_doc_comment);
 340             // Drop old closer.
 341             if i == line_breaks && line.ends_with("*/") && !line.starts_with("//") {
 342                 line = line[..(line.len() - 2)].trim_right();
 343             }
 344
 345             line
 346         })
 347         .map(|s| left_trim_comment_line(s, &style))
 348         .map(|(line, has_leading_whitespace)| {
 349             if orig.starts_with("/*") && line_breaks == 0 {
 350                 (
 351                     line.trim_left(),
 352                     has_leading_whitespace || config.normalize_comments(),
 353                 )
 354             } else {
 355                 (line, has_leading_whitespace || config.normalize_comments())
 356             }
 357         });
 358
 359     let mut result = String::with_capacity(orig.len() * 2);
 360     result.push_str(opener);
 361     let mut code_block_buffer = String::with_capacity(128);
 362     let mut is_prev_line_multi_line = false;
 363     let mut inside_code_block = false;
 364     let comment_line_separator = format!("{}{}", indent_str, line_start);
 365     let join_code_block_with_comment_line_separator = |s: &str| {
 366         let mut result = String::with_capacity(s.len() + 128);
 367         let mut iter = s.lines().peekable();
 368         while let Some(line) = iter.next() {
 369             result.push_str(line);
 370             result.push_str(match iter.peek() {
 371                 Some(next_line) if next_line.is_empty() => comment_line_separator.trim_right(),
 372                 Some(..) => &comment_line_separator,
 373                 None => "",
 374             });
 375         }
 376         result
 377     };
 378
 379     for (i, (line, has_leading_whitespace)) in lines.enumerate() {
 380         let is_last = i == count_newlines(orig);
 381
 382         if inside_code_block {
 383             if line.starts_with("```") {
 384                 inside_code_block = false;
 385                 result.push_str(&comment_line_separator);
 386                 let code_block = ::format_code_block(&code_block_buffer, config)
 387                     .unwrap_or_else(|| code_block_buffer.to_owned());
 388                 result.push_str(&join_code_block_with_comment_line_separator(&code_block));
 389                 code_block_buffer.clear();
 390                 result.push_str(&comment_line_separator);
 391                 result.push_str(line);
 392             } else {
 393                 code_block_buffer.push_str(line);
 394                 code_block_buffer.push('\n');
 395
 396                 if is_last {
 397                     // There is an code block that is not properly enclosed by backticks.
 398                     // We will leave them untouched.
 399                     result.push_str(&comment_line_separator);
 400                     result.push_str(&join_code_block_with_comment_line_separator(
 401                         &code_block_buffer,
 402                     ));
 403                 }
 404             }
 405
 406             continue;
 407         } else {
 408             inside_code_block = line.starts_with("```rust");
 409
 410             if result == opener {
 411                 let force_leading_whitespace = opener == "/* " && count_newlines(orig) == 0;
 412                 if !has_leading_whitespace && !force_leading_whitespace && result.ends_with(' ') {
 413                     result.pop();
 414                 }
 415                 if line.is_empty() {
 416                     continue;
 417                 }
 418             } else if is_prev_line_multi_line && !line.is_empty() {
 419                 result.push(' ')
 420             } else if is_last && !closer.is_empty() && line.is_empty() {
 421                 result.push_str(&indent_str);
 422             } else {
 423                 result.push_str(&comment_line_separator);
 424                 if !has_leading_whitespace && result.ends_with(' ') {
 425                     result.pop();
 426                 }
 427             }
 428         }
 429
 430         if config.wrap_comments() && line.len() > fmt.shape.width && !has_url(line) {
 431             match rewrite_string(line, &fmt, Some(max_chars)) {
 432                 Some(ref s) => {
 433                     is_prev_line_multi_line = s.contains('\n');
 434                     result.push_str(s);
 435                 }
 436                 None if is_prev_line_multi_line => {
 437                     // We failed to put the current `line` next to the previous `line`.
 438                     // Remove the trailing space, then start rewrite on the next line.
 439                     result.pop();
 440                     result.push_str(&comment_line_separator);
 441                     fmt.shape = Shape::legacy(max_chars, fmt_indent);
 442                     match rewrite_string(line, &fmt, Some(max_chars)) {
 443                         Some(ref s) => {
 444                             is_prev_line_multi_line = s.contains('\n');
 445                             result.push_str(s);
 446                         }
 447                         None => {
 448                             is_prev_line_multi_line = false;
 449                             result.push_str(line);
 450                         }
 451                     }
 452                 }
 453                 None => {
 454                     is_prev_line_multi_line = false;
 455                     result.push_str(line);
 456                 }
 457             }
 458
 459             fmt.shape = if is_prev_line_multi_line {
 460                 // 1 = " "
 461                 let offset = 1 + last_line_width(&result) - line_start.len();
 462                 Shape {
 463                     width: max_chars.checked_sub(offset).unwrap_or(0),
 464                     indent: fmt_indent,
 465                     offset: fmt.shape.offset + offset,
 466                 }
 467             } else {
 468                 Shape::legacy(max_chars, fmt_indent)
 469             };
 470         } else {
 471             if line.is_empty() && result.ends_with(' ') && !is_last {
 472                 // Remove space if this is an empty comment or a doc comment.
 473                 result.pop();
 474             }
 475             result.push_str(line);
 476             fmt.shape = Shape::legacy(max_chars, fmt_indent);
 477             is_prev_line_multi_line = false;
 478         }
 479     }
 480
 481     result.push_str(closer);
 482     if result.ends_with(opener) && opener.ends_with(' ') {
 483         // Trailing space.
 484         result.pop();
 485     }
 486
 487     Some(result)
 488 }
 489
 490 /// Returns true if the given string MAY include URLs or alike.
 491 fn has_url(s: &str) -> bool {
 492     // This function may return false positive, but should get its job done in most cases.
 493     s.contains("https://") || s.contains("http://") || s.contains("ftp://") || s.contains("file://")
 494 }
 495
 496 /// Given the span, rewrite the missing comment inside it if available.
 497 /// Note that the given span must only include comments (or leading/trailing whitespaces).
 498 pub fn rewrite_missing_comment(
 499     span: Span,
 500     shape: Shape,
 501     context: &RewriteContext,
 502 ) -> Option<String> {
 503     let missing_snippet = context.snippet(span);
 504     let trimmed_snippet = missing_snippet.trim();
 505     if !trimmed_snippet.is_empty() {
 506         rewrite_comment(trimmed_snippet, false, shape, context.config)
 507     } else {
 508         Some(String::new())
 509     }
 510 }
 511
 512 /// Recover the missing comments in the specified span, if available.
 513 /// The layout of the comments will be preserved as long as it does not break the code
 514 /// and its total width does not exceed the max width.
 515 pub fn recover_missing_comment_in_span(
 516     span: Span,
 517     shape: Shape,
 518     context: &RewriteContext,
 519     used_width: usize,
 520 ) -> Option<String> {
 521     let missing_comment = rewrite_missing_comment(span, shape, context)?;
 522     if missing_comment.is_empty() {
 523         Some(String::new())
 524     } else {
 525         let missing_snippet = context.snippet(span);
 526         let pos = missing_snippet.chars().position(|c| c == '/').unwrap_or(0);
 527         // 1 = ` `
 528         let total_width = missing_comment.len() + used_width + 1;
 529         let force_new_line_before_comment =
 530             missing_snippet[..pos].contains('\n') || total_width > context.config.max_width();
 531         let sep = if force_new_line_before_comment {
 532             shape.indent.to_string_with_newline(context.config)
 533         } else {
 534             Cow::from(" ")
 535         };
 536         Some(format!("{}{}", sep, missing_comment))
 537     }
 538 }
 539
 540 /// Trim trailing whitespaces unless they consist of two or more whitespaces.
 541 fn trim_right_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str {
 542     if is_doc_comment && s.ends_with("  ") {
 543         s
 544     } else {
 545         s.trim_right()
 546     }
 547 }
 548
 549 /// Trims whitespace and aligns to indent, but otherwise does not change comments.
 550 fn light_rewrite_comment(
 551     orig: &str,
 552     offset: Indent,
 553     config: &Config,
 554     is_doc_comment: bool,
 555 ) -> Option<String> {
 556     let lines: Vec<&str> = orig.lines()
 557         .map(|l| {
 558             // This is basically just l.trim(), but in the case that a line starts
 559             // with `*` we want to leave one space before it, so it aligns with the
 560             // `*` in `/*`.
 561             let first_non_whitespace = l.find(|c| !char::is_whitespace(c));
 562             let left_trimmed = if let Some(fnw) = first_non_whitespace {
 563                 if l.as_bytes()[fnw] == b'*' && fnw > 0 {
 564                     &l[fnw - 1..]
 565                 } else {
 566                     &l[fnw..]
 567                 }
 568             } else {
 569                 ""
 570             };
 571             // Preserve markdown's double-space line break syntax in doc comment.
 572             trim_right_unless_two_whitespaces(left_trimmed, is_doc_comment)
 573         })
 574         .collect();
 575     Some(lines.join(&format!("\n{}", offset.to_string(config))))
 576 }
 577
 578 /// Trims comment characters and possibly a single space from the left of a string.
 579 /// Does not trim all whitespace. If a single space is trimmed from the left of the string,
 580 /// this function returns true.
 581 fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle) -> (&'a str, bool) {
 582     if line.starts_with("//! ") || line.starts_with("/// ") || line.starts_with("/*! ")
 583         || line.starts_with("/** ")
 584     {
 585         (&line[4..], true)
 586     } else if let CommentStyle::Custom(opener) = *style {
 587         if line.starts_with(opener) {
 588             (&line[opener.len()..], true)
 589         } else {
 590             (&line[opener.trim_right().len()..], false)
 591         }
 592     } else if line.starts_with("/* ") || line.starts_with("// ") || line.starts_with("//!")
 593         || line.starts_with("///") || line.starts_with("** ")
 594         || line.starts_with("/*!")
 595         || (line.starts_with("/**") && !line.starts_with("/**/"))
 596     {
 597         (&line[3..], line.chars().nth(2).unwrap() == ' ')
 598     } else if line.starts_with("/*") || line.starts_with("* ") || line.starts_with("//")
 599         || line.starts_with("**")
 600     {
 601         (&line[2..], line.chars().nth(1).unwrap() == ' ')
 602     } else if line.starts_with('*') {
 603         (&line[1..], false)
 604     } else {
 605         (line, line.starts_with(' '))
 606     }
 607 }
 608
 609 pub trait FindUncommented {
 610     fn find_uncommented(&self, pat: &str) -> Option<usize>;
 611 }
 612
 613 impl FindUncommented for str {
 614     fn find_uncommented(&self, pat: &str) -> Option<usize> {
 615         let mut needle_iter = pat.chars();
 616         for (kind, (i, b)) in CharClasses::new(self.char_indices()) {
 617             match needle_iter.next() {
 618                 None => {
 619                     return Some(i - pat.len());
 620                 }
 621                 Some(c) => match kind {
 622                     FullCodeCharKind::Normal | FullCodeCharKind::InString if b == c => {}
 623                     _ => {
 624                         needle_iter = pat.chars();
 625                     }
 626                 },
 627             }
 628         }
 629
 630         // Handle case where the pattern is a suffix of the search string
 631         match needle_iter.next() {
 632             Some(_) => None,
 633             None => Some(self.len() - pat.len()),
 634         }
 635     }
 636 }
 637
 638 // Returns the first byte position after the first comment. The given string
 639 // is expected to be prefixed by a comment, including delimiters.
 640 // Good: "/* /* inner */ outer */ code();"
 641 // Bad:  "code(); // hello\n world!"
 642 pub fn find_comment_end(s: &str) -> Option<usize> {
 643     let mut iter = CharClasses::new(s.char_indices());
 644     for (kind, (i, _c)) in &mut iter {
 645         if kind == FullCodeCharKind::Normal || kind == FullCodeCharKind::InString {
 646             return Some(i);
 647         }
 648     }
 649
 650     // Handle case where the comment ends at the end of s.
 651     if iter.status == CharClassesStatus::Normal {
 652         Some(s.len())
 653     } else {
 654         None
 655     }
 656 }
 657
 658 /// Returns true if text contains any comment.
 659 pub fn contains_comment(text: &str) -> bool {
 660     CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
 661 }
 662
 663 /// Remove trailing spaces from the specified snippet. We do not remove spaces
 664 /// inside strings or comments.
 665 pub fn remove_trailing_white_spaces(text: &str) -> String {
 666     let mut buffer = String::with_capacity(text.len());
 667     let mut space_buffer = String::with_capacity(128);
 668     for (char_kind, c) in CharClasses::new(text.chars()) {
 669         match c {
 670             '\n' => {
 671                 if char_kind == FullCodeCharKind::InString {
 672                     buffer.push_str(&space_buffer);
 673                 }
 674                 space_buffer.clear();
 675                 buffer.push('\n');
 676             }
 677             _ if c.is_whitespace() => {
 678                 space_buffer.push(c);
 679             }
 680             _ => {
 681                 if !space_buffer.is_empty() {
 682                     buffer.push_str(&space_buffer);
 683                     space_buffer.clear();
 684                 }
 685                 buffer.push(c);
 686             }
 687         }
 688     }
 689     buffer
 690 }
 691
 692 pub struct CharClasses<T>
 693 where
 694     T: Iterator,
 695     T::Item: RichChar,
 696 {
 697     base: MultiPeek<T>,
 698     status: CharClassesStatus,
 699 }
 700
 701 pub trait RichChar {
 702     fn get_char(&self) -> char;
 703 }
 704
 705 impl RichChar for char {
 706     fn get_char(&self) -> char {
 707         *self
 708     }
 709 }
 710
 711 impl RichChar for (usize, char) {
 712     fn get_char(&self) -> char {
 713         self.1
 714     }
 715 }
 716
 717 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 718 enum CharClassesStatus {
 719     Normal,
 720     LitString,
 721     LitStringEscape,
 722     LitChar,
 723     LitCharEscape,
 724     // The u32 is the nesting deepness of the comment
 725     BlockComment(u32),
 726     // Status when the '/' has been consumed, but not yet the '*', deepness is
 727     // the new deepness (after the comment opening).
 728     BlockCommentOpening(u32),
 729     // Status when the '*' has been consumed, but not yet the '/', deepness is
 730     // the new deepness (after the comment closing).
 731     BlockCommentClosing(u32),
 732     LineComment,
 733 }
 734
 735 /// Distinguish between functional part of code and comments
 736 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 737 pub enum CodeCharKind {
 738     Normal,
 739     Comment,
 740 }
 741
 742 /// Distinguish between functional part of code and comments,
 743 /// describing opening and closing of comments for ease when chunking
 744 /// code from tagged characters
 745 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 746 pub enum FullCodeCharKind {
 747     Normal,
 748     /// The first character of a comment, there is only one for a comment (always '/')
 749     StartComment,
 750     /// Any character inside a comment including the second character of comment
 751     /// marks ("//", "/*")
 752     InComment,
 753     /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
 754     EndComment,
 755     /// Inside a string.
 756     InString,
 757 }
 758
 759 impl FullCodeCharKind {
 760     pub fn is_comment(&self) -> bool {
 761         match *self {
 762             FullCodeCharKind::StartComment
 763             | FullCodeCharKind::InComment
 764             | FullCodeCharKind::EndComment => true,
 765             _ => false,
 766         }
 767     }
 768
 769     pub fn is_string(&self) -> bool {
 770         *self == FullCodeCharKind::InString
 771     }
 772
 773     fn to_codecharkind(&self) -> CodeCharKind {
 774         if self.is_comment() {
 775             CodeCharKind::Comment
 776         } else {
 777             CodeCharKind::Normal
 778         }
 779     }
 780 }
 781
 782 impl<T> CharClasses<T>
 783 where
 784     T: Iterator,
 785     T::Item: RichChar,
 786 {
 787     pub fn new(base: T) -> CharClasses<T> {
 788         CharClasses {
 789             base: multipeek(base),
 790             status: CharClassesStatus::Normal,
 791         }
 792     }
 793 }
 794
 795 impl<T> Iterator for CharClasses<T>
 796 where
 797     T: Iterator,
 798     T::Item: RichChar,
 799 {
 800     type Item = (FullCodeCharKind, T::Item);
 801
 802     fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
 803         let item = self.base.next()?;
 804         let chr = item.get_char();
 805         let mut char_kind = FullCodeCharKind::Normal;
 806         self.status = match self.status {
 807             CharClassesStatus::LitString => match chr {
 808                 '"' => CharClassesStatus::Normal,
 809                 '\\' => {
 810                     char_kind = FullCodeCharKind::InString;
 811                     CharClassesStatus::LitStringEscape
 812                 }
 813                 _ => {
 814                     char_kind = FullCodeCharKind::InString;
 815                     CharClassesStatus::LitString
 816                 }
 817             },
 818             CharClassesStatus::LitStringEscape => {
 819                 char_kind = FullCodeCharKind::InString;
 820                 CharClassesStatus::LitString
 821             }
 822             CharClassesStatus::LitChar => match chr {
 823                 '\\' => CharClassesStatus::LitCharEscape,
 824                 '\'' => CharClassesStatus::Normal,
 825                 _ => CharClassesStatus::LitChar,
 826             },
 827             CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
 828             CharClassesStatus::Normal => match chr {
 829                 '"' => {
 830                     char_kind = FullCodeCharKind::InString;
 831                     CharClassesStatus::LitString
 832                 }
 833                 '\'' => {
 834                     // HACK: Work around mut borrow.
 835                     match self.base.peek() {
 836                         Some(next) if next.get_char() == '\\' => {
 837                             self.status = CharClassesStatus::LitChar;
 838                             return Some((char_kind, item));
 839                         }
 840                         _ => (),
 841                     }
 842
 843                     match self.base.peek() {
 844                         Some(next) if next.get_char() == '\'' => CharClassesStatus::LitChar,
 845                         _ => CharClassesStatus::Normal,
 846                     }
 847                 }
 848                 '/' => match self.base.peek() {
 849                     Some(next) if next.get_char() == '*' => {
 850                         self.status = CharClassesStatus::BlockCommentOpening(1);
 851                         return Some((FullCodeCharKind::StartComment, item));
 852                     }
 853                     Some(next) if next.get_char() == '/' => {
 854                         self.status = CharClassesStatus::LineComment;
 855                         return Some((FullCodeCharKind::StartComment, item));
 856                     }
 857                     _ => CharClassesStatus::Normal,
 858                 },
 859                 _ => CharClassesStatus::Normal,
 860             },
 861             CharClassesStatus::BlockComment(deepness) => {
 862                 assert_ne!(deepness, 0);
 863                 self.status = match self.base.peek() {
 864                     Some(next) if next.get_char() == '/' && chr == '*' => {
 865                         CharClassesStatus::BlockCommentClosing(deepness - 1)
 866                     }
 867                     Some(next) if next.get_char() == '*' && chr == '/' => {
 868                         CharClassesStatus::BlockCommentOpening(deepness + 1)
 869                     }
 870                     _ => CharClassesStatus::BlockComment(deepness),
 871                 };
 872                 return Some((FullCodeCharKind::InComment, item));
 873             }
 874             CharClassesStatus::BlockCommentOpening(deepness) => {
 875                 assert_eq!(chr, '*');
 876                 self.status = CharClassesStatus::BlockComment(deepness);
 877                 return Some((FullCodeCharKind::InComment, item));
 878             }
 879             CharClassesStatus::BlockCommentClosing(deepness) => {
 880                 assert_eq!(chr, '/');
 881                 if deepness == 0 {
 882                     self.status = CharClassesStatus::Normal;
 883                     return Some((FullCodeCharKind::EndComment, item));
 884                 } else {
 885                     self.status = CharClassesStatus::BlockComment(deepness);
 886                     return Some((FullCodeCharKind::InComment, item));
 887                 }
 888             }
 889             CharClassesStatus::LineComment => match chr {
 890                 '\n' => {
 891                     self.status = CharClassesStatus::Normal;
 892                     return Some((FullCodeCharKind::EndComment, item));
 893                 }
 894                 _ => {
 895                     self.status = CharClassesStatus::LineComment;
 896                     return Some((FullCodeCharKind::InComment, item));
 897                 }
 898             },
 899         };
 900         Some((char_kind, item))
 901     }
 902 }
 903
 904 /// Iterator over functional and commented parts of a string. Any part of a string is either
 905 /// functional code, either *one* block comment, either *one* line comment. Whitespace between
 906 /// comments is functional code. Line comments contain their ending newlines.
 907 struct UngroupedCommentCodeSlices<'a> {
 908     slice: &'a str,
 909     iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
 910 }
 911
 912 impl<'a> UngroupedCommentCodeSlices<'a> {
 913     fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
 914         UngroupedCommentCodeSlices {
 915             slice: code,
 916             iter: CharClasses::new(code.char_indices()).peekable(),
 917         }
 918     }
 919 }
 920
 921 impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
 922     type Item = (CodeCharKind, usize, &'a str);
 923
 924     fn next(&mut self) -> Option<Self::Item> {
 925         let (kind, (start_idx, _)) = self.iter.next()?;
 926         match kind {
 927             FullCodeCharKind::Normal | FullCodeCharKind::InString => {
 928                 // Consume all the Normal code
 929                 while let Some(&(char_kind, _)) = self.iter.peek() {
 930                     if char_kind.is_comment() {
 931                         break;
 932                     }
 933                     let _ = self.iter.next();
 934                 }
 935             }
 936             FullCodeCharKind::StartComment => {
 937                 // Consume the whole comment
 938                 while let Some((FullCodeCharKind::InComment, (_, _))) = self.iter.next() {}
 939             }
 940             _ => panic!(),
 941         }
 942         let slice = match self.iter.peek() {
 943             Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
 944             None => &self.slice[start_idx..],
 945         };
 946         Some((
 947             if kind.is_comment() {
 948                 CodeCharKind::Comment
 949             } else {
 950                 CodeCharKind::Normal
 951             },
 952             start_idx,
 953             slice,
 954         ))
 955     }
 956 }
 957
 958 /// Iterator over an alternating sequence of functional and commented parts of
 959 /// a string. The first item is always a, possibly zero length, subslice of
 960 /// functional text. Line style comments contain their ending newlines.
 961 pub struct CommentCodeSlices<'a> {
 962     slice: &'a str,
 963     last_slice_kind: CodeCharKind,
 964     last_slice_end: usize,
 965 }
 966
 967 impl<'a> CommentCodeSlices<'a> {
 968     pub fn new(slice: &'a str) -> CommentCodeSlices<'a> {
 969         CommentCodeSlices {
 970             slice,
 971             last_slice_kind: CodeCharKind::Comment,
 972             last_slice_end: 0,
 973         }
 974     }
 975 }
 976
 977 impl<'a> Iterator for CommentCodeSlices<'a> {
 978     type Item = (CodeCharKind, usize, &'a str);
 979
 980     fn next(&mut self) -> Option<Self::Item> {
 981         if self.last_slice_end == self.slice.len() {
 982             return None;
 983         }
 984
 985         let mut sub_slice_end = self.last_slice_end;
 986         let mut first_whitespace = None;
 987         let subslice = &self.slice[self.last_slice_end..];
 988         let mut iter = CharClasses::new(subslice.char_indices());
 989
 990         for (kind, (i, c)) in &mut iter {
 991             let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal
 992                 && &subslice[..2] == "//"
 993                 && [' ', '\t'].contains(&c);
 994
 995             if is_comment_connector && first_whitespace.is_none() {
 996                 first_whitespace = Some(i);
 997             }
 998
 999             if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
1000                 let last_index = match first_whitespace {
1001                     Some(j) => j,
1002                     None => i,
1003                 };
1004                 sub_slice_end = self.last_slice_end + last_index;
1005                 break;
1006             }
1007
1008             if !is_comment_connector {
1009                 first_whitespace = None;
1010             }
1011         }
1012
1013         if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) {
1014             // This was the last subslice.
1015             sub_slice_end = match first_whitespace {
1016                 Some(i) => self.last_slice_end + i,
1017                 None => self.slice.len(),
1018             };
1019         }
1020
1021         let kind = match self.last_slice_kind {
1022             CodeCharKind::Comment => CodeCharKind::Normal,
1023             CodeCharKind::Normal => CodeCharKind::Comment,
1024         };
1025         let res = (
1026             kind,
1027             self.last_slice_end,
1028             &self.slice[self.last_slice_end..sub_slice_end],
1029         );
1030         self.last_slice_end = sub_slice_end;
1031         self.last_slice_kind = kind;
1032
1033         Some(res)
1034     }
1035 }
1036
1037 /// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
1038 /// (if it fits in the width/offset, else return None), else return `new`
1039 pub fn recover_comment_removed(
1040     new: String,
1041     span: Span,
1042     context: &RewriteContext,
1043 ) -> Option<String> {
1044     let snippet = context.snippet(span);
1045     if snippet != new && changed_comment_content(snippet, &new) {
1046         // We missed some comments. Keep the original text.
1047         Some(snippet.to_owned())
1048     } else {
1049         Some(new)
1050     }
1051 }
1052
1053 /// Return true if the two strings of code have the same payload of comments.
1054 /// The payload of comments is everything in the string except:
1055 ///     - actual code (not comments)
1056 ///     - comment start/end marks
1057 ///     - whitespace
1058 ///     - '*' at the beginning of lines in block comments
1059 fn changed_comment_content(orig: &str, new: &str) -> bool {
1060     // Cannot write this as a fn since we cannot return types containing closures
1061     let code_comment_content = |code| {
1062         let slices = UngroupedCommentCodeSlices::new(code);
1063         slices
1064             .filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
1065             .flat_map(|(_, _, s)| CommentReducer::new(s))
1066     };
1067     let res = code_comment_content(orig).ne(code_comment_content(new));
1068     debug!(
1069         "comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
1070         res,
1071         orig,
1072         new,
1073         code_comment_content(orig).collect::<String>(),
1074         code_comment_content(new).collect::<String>()
1075     );
1076     res
1077 }
1078
1079 /// Iterator over the 'payload' characters of a comment.
1080 /// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
1081 /// The comment must be one comment, ie not more than one start mark (no multiple line comments,
1082 /// for example).
1083 struct CommentReducer<'a> {
1084     is_block: bool,
1085     at_start_line: bool,
1086     iter: std::str::Chars<'a>,
1087 }
1088
1089 impl<'a> CommentReducer<'a> {
1090     fn new(comment: &'a str) -> CommentReducer<'a> {
1091         let is_block = comment.starts_with("/*");
1092         let comment = remove_comment_header(comment);
1093         CommentReducer {
1094             is_block,
1095             at_start_line: false, // There are no supplementary '*' on the first line
1096             iter: comment.chars(),
1097         }
1098     }
1099 }
1100
1101 impl<'a> Iterator for CommentReducer<'a> {
1102     type Item = char;
1103     fn next(&mut self) -> Option<Self::Item> {
1104         loop {
1105             let mut c = self.iter.next()?;
1106             if self.is_block && self.at_start_line {
1107                 while c.is_whitespace() {
1108                     c = self.iter.next()?;
1109                 }
1110                 // Ignore leading '*'
1111                 if c == '*' {
1112                     c = self.iter.next()?;
1113                 }
1114             } else if c == '\n' {
1115                 self.at_start_line = true;
1116             }
1117             if !c.is_whitespace() {
1118                 return Some(c);
1119             }
1120         }
1121     }
1122 }
1123
1124 fn remove_comment_header(comment: &str) -> &str {
1125     if comment.starts_with("///") || comment.starts_with("//!") {
1126         &comment[3..]
1127     } else if comment.starts_with("//") {
1128         &comment[2..]
1129     } else if (comment.starts_with("/**") && !comment.starts_with("/**/"))
1130         || comment.starts_with("/*!")
1131     {
1132         &comment[3..comment.len() - 2]
1133     } else {
1134         assert!(
1135             comment.starts_with("/*"),
1136             format!("string '{}' is not a comment", comment)
1137         );
1138         &comment[2..comment.len() - 2]
1139     }
1140 }
1141
1142 #[cfg(test)]
1143 mod test {
1144     use super::{contains_comment, rewrite_comment, CharClasses, CodeCharKind, CommentCodeSlices,
1145                 FindUncommented, FullCodeCharKind};
1146     use shape::{Indent, Shape};
1147
1148     #[test]
1149     fn char_classes() {
1150         let mut iter = CharClasses::new("//\n\n".chars());
1151
1152         assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
1153         assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
1154         assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
1155         assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
1156         assert_eq!(None, iter.next());
1157     }
1158
1159     #[test]
1160     fn comment_code_slices() {
1161         let input = "code(); /* test */ 1 + 1";
1162         let mut iter = CommentCodeSlices::new(input);
1163
1164         assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap());
1165         assert_eq!(
1166             (CodeCharKind::Comment, 8, "/* test */"),
1167             iter.next().unwrap()
1168         );
1169         assert_eq!((CodeCharKind::Normal, 18, " 1 + 1"), iter.next().unwrap());
1170         assert_eq!(None, iter.next());
1171     }
1172
1173     #[test]
1174     fn comment_code_slices_two() {
1175         let input = "// comment\n    test();";
1176         let mut iter = CommentCodeSlices::new(input);
1177
1178         assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap());
1179         assert_eq!(
1180             (CodeCharKind::Comment, 0, "// comment\n"),
1181             iter.next().unwrap()
1182         );
1183         assert_eq!(
1184             (CodeCharKind::Normal, 11, "    test();"),
1185             iter.next().unwrap()
1186         );
1187         assert_eq!(None, iter.next());
1188     }
1189
1190     #[test]
1191     fn comment_code_slices_three() {
1192         let input = "1 // comment\n    // comment2\n\n";
1193         let mut iter = CommentCodeSlices::new(input);
1194
1195         assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap());
1196         assert_eq!(
1197             (CodeCharKind::Comment, 2, "// comment\n    // comment2\n"),
1198             iter.next().unwrap()
1199         );
1200         assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap());
1201         assert_eq!(None, iter.next());
1202     }
1203
1204     #[test]
1205     #[cfg_attr(rustfmt, rustfmt_skip)]
1206     fn format_comments() {
1207         let mut config: ::config::Config = Default::default();
1208         config.set().wrap_comments(true);
1209         config.set().normalize_comments(true);
1210
1211         let comment = rewrite_comment(" //test",
1212                                       true,
1213                                       Shape::legacy(100, Indent::new(0, 100)),
1214                                       &config).unwrap();
1215         assert_eq!("/* test */", comment);
1216
1217         let comment = rewrite_comment("// comment on a",
1218                                       false,
1219                                       Shape::legacy(10, Indent::empty()),
1220                                       &config).unwrap();
1221         assert_eq!("// comment\n// on a", comment);
1222
1223         let comment = rewrite_comment("//  A multi line comment\n             // between args.",
1224                                       false,
1225                                       Shape::legacy(60, Indent::new(0, 12)),
1226                                       &config).unwrap();
1227         assert_eq!("//  A multi line comment\n            // between args.", comment);
1228
1229         let input = "// comment";
1230         let expected =
1231             "/* comment */";
1232         let comment = rewrite_comment(input,
1233                                       true,
1234                                       Shape::legacy(9, Indent::new(0, 69)),
1235                                       &config).unwrap();
1236         assert_eq!(expected, comment);
1237
1238         let comment = rewrite_comment("/*   trimmed    */",
1239                                       true,
1240                                       Shape::legacy(100, Indent::new(0, 100)),
1241                                       &config).unwrap();
1242         assert_eq!("/* trimmed */", comment);
1243     }
1244
1245     // This is probably intended to be a non-test fn, but it is not used. I'm
1246     // keeping it around unless it helps us test stuff.
1247     fn uncommented(text: &str) -> String {
1248         CharClasses::new(text.chars())
1249             .filter_map(|(s, c)| match s {
1250                 FullCodeCharKind::Normal | FullCodeCharKind::InString => Some(c),
1251                 _ => None,
1252             })
1253             .collect()
1254     }
1255
1256     #[test]
1257     fn test_uncommented() {
1258         assert_eq!(&uncommented("abc/*...*/"), "abc");
1259         assert_eq!(
1260             &uncommented("// .... /* \n../* /* *** / */ */a/* // */c\n"),
1261             "..ac\n"
1262         );
1263         assert_eq!(&uncommented("abc \" /* */\" qsdf"), "abc \" /* */\" qsdf");
1264     }
1265
1266     #[test]
1267     fn test_contains_comment() {
1268         assert_eq!(contains_comment("abc"), false);
1269         assert_eq!(contains_comment("abc // qsdf"), true);
1270         assert_eq!(contains_comment("abc /* kqsdf"), true);
1271         assert_eq!(contains_comment("abc \" /* */\" qsdf"), false);
1272     }
1273
1274     #[test]
1275     fn test_find_uncommented() {
1276         fn check(haystack: &str, needle: &str, expected: Option<usize>) {
1277             assert_eq!(expected, haystack.find_uncommented(needle));
1278         }
1279
1280         check("/*/ */test", "test", Some(6));
1281         check("//test\ntest", "test", Some(7));
1282         check("/* comment only */", "whatever", None);
1283         check(
1284             "/* comment */ some text /* more commentary */ result",
1285             "result",
1286             Some(46),
1287         );
1288         check("sup // sup", "p", Some(2));
1289         check("sup", "x", None);
1290         check(r#"π? /**/ π is nice!"#, r#"π is nice"#, Some(9));
1291         check("/*sup yo? \n sup*/ sup", "p", Some(20));
1292         check("hel/*lohello*/lo", "hello", None);
1293         check("acb", "ab", None);
1294         check(",/*A*/ ", ",", Some(0));
1295         check("abc", "abc", Some(0));
1296         check("/* abc */", "abc", None);
1297         check("/**/abc/* */", "abc", Some(4));
1298         check("\"/* abc */\"", "abc", Some(4));
1299         check("\"/* abc", "abc", Some(4));
1300     }
1301 }