X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=src%2Fstring.rs;h=d45f15a1c9bab59cb85710d453c096347cd6a748;hb=6fb188bd43840f4a99c6a4b4cdbdb21ccf3304e7;hp=fd7ac89013a32cba04c94a37bd7cbd2a1bc0df62;hpb=9038da6df0a11a55255a7ea562d91d1e93679252;p=rust.git diff --git a/src/string.rs b/src/string.rs index fd7ac89013a..d45f15a1c9b 100644 --- a/src/string.rs +++ b/src/string.rs @@ -19,12 +19,19 @@ const MIN_STRING: usize = 10; +/// Describes the layout of a piece of text. pub struct StringFormat<'a> { + /// The opening sequence of characters for the piece of text pub opener: &'a str, + /// The closing sequence of characters for the piece of text pub closer: &'a str, + /// The opening sequence of characters for a line pub line_start: &'a str, + /// The closing sequence of characters for a line pub line_end: &'a str, + /// The allocated box to fit the text into pub shape: Shape, + /// Trim trailing whitespaces pub trim_end: bool, pub config: &'a Config, } @@ -57,16 +64,21 @@ fn max_chars_with_indent(&self) -> Option { /// Like max_chars_with_indent but the indentation is not substracted. /// This allows to fit more graphemes from the string on a line when - /// SnippetState::Overflow. + /// SnippetState::EndWithLineFeed. fn max_chars_without_indent(&self) -> Option { Some(self.config.max_width().checked_sub(self.line_end.len())?) } } -pub fn rewrite_string<'a>(orig: &str, fmt: &StringFormat<'a>) -> Option { +pub fn rewrite_string<'a>( + orig: &str, + fmt: &StringFormat<'a>, + newline_max_chars: usize, +) -> Option { let max_chars_with_indent = fmt.max_chars_with_indent()?; let max_chars_without_indent = fmt.max_chars_without_indent()?; - let indent = fmt.shape.indent.to_string_with_newline(fmt.config); + let indent_with_newline = fmt.shape.indent.to_string_with_newline(fmt.config); + let indent_without_newline = fmt.shape.indent.to_string(fmt.config); // Strip line breaks. // With this regex applied, all remaining whitespaces are significant @@ -88,26 +100,55 @@ pub fn rewrite_string<'a>(orig: &str, fmt: &StringFormat<'a>) -> Option // Snip a line at a time from `stripped_str` until it is used up. Push the snippet // onto result. let mut cur_max_chars = max_chars_with_indent; + let is_bareline_ok = fmt.line_start.is_empty() || is_whitespace(fmt.line_start); loop { // All the input starting at cur_start fits on the current line if graphemes.len() - cur_start <= cur_max_chars { - result.push_str(&graphemes[cur_start..].join("")); + for (i, grapheme) in graphemes[cur_start..].iter().enumerate() { + if is_line_feed(grapheme) { + // take care of blank lines + result = trim_right_but_line_feed(fmt.trim_end, result); + result.push_str("\n"); + if !is_bareline_ok && cur_start + i + 1 < graphemes.len() { + result.push_str(&indent_without_newline); + result.push_str(fmt.line_start); + } + } else { + result.push_str(grapheme); + } + } + result = trim_right_but_line_feed(fmt.trim_end, result); break; } // The input starting at cur_start needs to be broken - match break_string(cur_max_chars, fmt.trim_end, &graphemes[cur_start..]) { + match break_string( + cur_max_chars, + fmt.trim_end, + fmt.line_end, + &graphemes[cur_start..], + ) { SnippetState::LineEnd(line, len) => { result.push_str(&line); result.push_str(fmt.line_end); - result.push_str(&indent); + result.push_str(&indent_with_newline); result.push_str(fmt.line_start); - cur_max_chars = max_chars_with_indent; + cur_max_chars = newline_max_chars; cur_start += len; } - SnippetState::Overflow(line, len) => { + SnippetState::EndWithLineFeed(line, len) => { + if line == "\n" && fmt.trim_end { + result = result.trim_right().to_string(); + } result.push_str(&line); - cur_max_chars = max_chars_without_indent; + if is_bareline_ok { + // the next line can benefit from the full width + cur_max_chars = max_chars_without_indent; + } else { + result.push_str(&indent_without_newline); + result.push_str(fmt.line_start); + cur_max_chars = max_chars_with_indent; + } cur_start += len; } SnippetState::EndOfInput(line) => { @@ -121,6 +162,43 @@ pub fn rewrite_string<'a>(orig: &str, fmt: &StringFormat<'a>) -> Option wrap_str(result, fmt.config.max_width(), fmt.shape) } +/// Returns the index to the end of the url if the given string includes an +/// URL or alike. Otherwise, returns None; +fn detect_url(s: &[&str], index: usize) -> Option { + let start = match s[..=index].iter().rposition(|g| is_whitespace(g)) { + Some(pos) => pos + 1, + None => 0, + }; + if s.len() < start + 8 { + return None; + } + let prefix = s[start..start + 8].concat(); + if prefix.starts_with("https://") + || prefix.starts_with("http://") + || prefix.starts_with("ftp://") + || prefix.starts_with("file://") + { + match s[index..].iter().position(|g| is_whitespace(g)) { + Some(pos) => Some(index + pos - 1), + None => Some(s.len() - 1), + } + } else { + None + } +} + +/// Trims whitespaces to the right except for the line feed character. +fn trim_right_but_line_feed(trim_end: bool, result: String) -> String { + let whitespace_except_line_feed = |c: char| c.is_whitespace() && c != '\n'; + if trim_end && result.ends_with(whitespace_except_line_feed) { + result + .trim_right_matches(whitespace_except_line_feed) + .to_string() + } else { + result + } +} + /// Result of breaking a string so it fits in a line and the state it ended in. /// The state informs about what to do with the snippet and how to continue the breaking process. #[derive(Debug, PartialEq)] @@ -129,55 +207,95 @@ enum SnippetState { EndOfInput(String), /// The input could be broken and the returned snippet should be ended with a /// `[StringFormat::line_end]`. The next snippet needs to be indented. + /// + /// The returned string is the line to print out and the number is the length that got read in + /// the text being rewritten. That length may be greater than the returned string if trailing + /// whitespaces got trimmed. LineEnd(String, usize), - /// The input could be broken but the returned snippet should not be ended with a - /// `[StringFormat::line_end]` because the whitespace is significant. Therefore, the next - /// snippet should not be indented. - Overflow(String, usize), + /// The input could be broken but a newline is present that cannot be trimmed. The next snippet + /// to be rewritten *could* use more width than what is specified by the given shape. For + /// example with a multiline string, the next snippet does not need to be indented, allowing + /// more characters to be fit within a line. + /// + /// The returned string is the line to print out and the number is the length that got read in + /// the text being rewritten. + EndWithLineFeed(String, usize), +} + +fn not_whitespace_except_line_feed(g: &str) -> bool { + is_line_feed(g) || !is_whitespace(g) } /// Break the input string at a boundary character around the offset `max_chars`. A boundary /// character is either a punctuation or a whitespace. -fn break_string(max_chars: usize, trim_end: bool, input: &[&str]) -> SnippetState { +fn break_string(max_chars: usize, trim_end: bool, line_end: &str, input: &[&str]) -> SnippetState { let break_at = |index /* grapheme at index is included */| { - // Take in any whitespaces to the left/right of `input[index]` and - // check if there is a line feed, in which case whitespaces needs to be kept. - let mut index_minus_ws = index; - for (i, grapheme) in input[0..=index].iter().enumerate().rev() { - if !trim_end && is_line_feed(grapheme) { - return SnippetState::Overflow(input[0..=i].join("").to_string(), i + 1); - } else if !is_whitespace(grapheme) { - index_minus_ws = i; + // Take in any whitespaces to the left/right of `input[index]` while + // preserving line feeds + let index_minus_ws = input[0..=index] + .iter() + .rposition(|grapheme| not_whitespace_except_line_feed(grapheme)) + .unwrap_or(index); + // Take into account newlines occuring in input[0..=index], i.e., the possible next new + // line. If there is one, then text after it could be rewritten in a way that the available + // space is fully used. + for (i, grapheme) in input[0..=index].iter().enumerate() { + if is_line_feed(grapheme) { + if i <= index_minus_ws { + let mut line = &input[0..i].concat()[..]; + if trim_end { + line = line.trim_right(); + } + return SnippetState::EndWithLineFeed(format!("{}\n", line), i + 1); + } break; } } + let mut index_plus_ws = index; for (i, grapheme) in input[index + 1..].iter().enumerate() { if !trim_end && is_line_feed(grapheme) { - return SnippetState::Overflow( - input[0..=index + 1 + i].join("").to_string(), + return SnippetState::EndWithLineFeed( + input[0..=index + 1 + i].concat(), index + 2 + i, ); - } else if !is_whitespace(grapheme) { + } else if not_whitespace_except_line_feed(grapheme) { index_plus_ws = index + i; break; } } if trim_end { - SnippetState::LineEnd( - input[0..=index_minus_ws].join("").to_string(), - index_plus_ws + 1, - ) + SnippetState::LineEnd(input[0..=index_minus_ws].concat(), index_plus_ws + 1) } else { - SnippetState::LineEnd( - input[0..=index_plus_ws].join("").to_string(), - index_plus_ws + 1, - ) + SnippetState::LineEnd(input[0..=index_plus_ws].concat(), index_plus_ws + 1) } }; // Find the position in input for breaking the string + if line_end.is_empty() + && trim_end + && !is_whitespace(input[max_chars - 1]) + && is_whitespace(input[max_chars]) + { + // At a breaking point already + // The line won't invalidate the rewriting because: + // - no extra space needed for the line_end character + // - extra whitespaces to the right can be trimmed + return break_at(max_chars - 1); + } + if let Some(url_index_end) = detect_url(input, max_chars) { + let index_plus_ws = url_index_end + input[url_index_end..] + .iter() + .skip(1) + .position(|grapheme| not_whitespace_except_line_feed(grapheme)) + .unwrap_or(0); + return if trim_end { + SnippetState::LineEnd(input[..=url_index_end].concat(), index_plus_ws + 1) + } else { + return SnippetState::LineEnd(input[..=index_plus_ws].concat(), index_plus_ws + 1); + }; + } match input[0..max_chars] .iter() .rposition(|grapheme| is_whitespace(grapheme)) @@ -200,7 +318,7 @@ fn break_string(max_chars: usize, trim_end: bool, input: &[&str]) -> SnippetStat // A boundary was found after the line limit Some(index) => break_at(max_chars + index), // No boundary to the right, the input cannot be broken - None => SnippetState::EndOfInput(input.join("").to_string()), + None => SnippetState::EndOfInput(input.concat()), }, }, } @@ -223,7 +341,8 @@ fn is_punctuation(grapheme: &str) -> bool { #[cfg(test)] mod test { - use super::{break_string, rewrite_string, SnippetState, StringFormat}; + use super::{break_string, detect_url, rewrite_string, SnippetState, StringFormat}; + use config::Config; use shape::{Indent, Shape}; use unicode_segmentation::UnicodeSegmentation; @@ -231,7 +350,7 @@ mod test { fn issue343() { let config = Default::default(); let fmt = StringFormat::new(Shape::legacy(2, Indent::empty()), &config); - rewrite_string("eq_", &fmt); + rewrite_string("eq_", &fmt, 2); } #[test] @@ -239,11 +358,11 @@ fn should_break_on_whitespace() { let string = "Placerat felis. Mauris porta ante sagittis purus."; let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); assert_eq!( - break_string(20, false, &graphemes[..]), + break_string(20, false, "", &graphemes[..]), SnippetState::LineEnd("Placerat felis. ".to_string(), 16) ); assert_eq!( - break_string(20, true, &graphemes[..]), + break_string(20, true, "", &graphemes[..]), SnippetState::LineEnd("Placerat felis.".to_string(), 16) ); } @@ -253,7 +372,7 @@ fn should_break_on_punctuation() { let string = "Placerat_felis._Mauris_porta_ante_sagittis_purus."; let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); assert_eq!( - break_string(20, false, &graphemes[..]), + break_string(20, false, "", &graphemes[..]), SnippetState::LineEnd("Placerat_felis.".to_string(), 15) ); } @@ -263,11 +382,11 @@ fn should_break_forward() { let string = "Venenatis_tellus_vel_tellus. Aliquam aliquam dolor at justo."; let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); assert_eq!( - break_string(20, false, &graphemes[..]), + break_string(20, false, "", &graphemes[..]), SnippetState::LineEnd("Venenatis_tellus_vel_tellus. ".to_string(), 29) ); assert_eq!( - break_string(20, true, &graphemes[..]), + break_string(20, true, "", &graphemes[..]), SnippetState::LineEnd("Venenatis_tellus_vel_tellus.".to_string(), 29) ); } @@ -277,7 +396,7 @@ fn nothing_to_break() { let string = "Venenatis_tellus_vel_tellus"; let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); assert_eq!( - break_string(20, false, &graphemes[..]), + break_string(20, false, "", &graphemes[..]), SnippetState::EndOfInput("Venenatis_tellus_vel_tellus".to_string()) ); } @@ -287,21 +406,21 @@ fn significant_whitespaces() { let string = "Neque in sem. \n Pellentesque tellus augue."; let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); assert_eq!( - break_string(15, false, &graphemes[..]), - SnippetState::Overflow("Neque in sem. \n".to_string(), 20) + break_string(15, false, "", &graphemes[..]), + SnippetState::EndWithLineFeed("Neque in sem. \n".to_string(), 20) ); assert_eq!( - break_string(25, false, &graphemes[..]), - SnippetState::Overflow("Neque in sem. \n".to_string(), 20) + break_string(25, false, "", &graphemes[..]), + SnippetState::EndWithLineFeed("Neque in sem. \n".to_string(), 20) ); - // if `StringFormat::line_end` is true, then the line feed does not matter anymore + assert_eq!( - break_string(15, true, &graphemes[..]), - SnippetState::LineEnd("Neque in sem.".to_string(), 26) + break_string(15, true, "", &graphemes[..]), + SnippetState::LineEnd("Neque in sem.".to_string(), 19) ); assert_eq!( - break_string(25, true, &graphemes[..]), - SnippetState::LineEnd("Neque in sem.".to_string(), 26) + break_string(25, true, "", &graphemes[..]), + SnippetState::EndWithLineFeed("Neque in sem.\n".to_string(), 20) ); } @@ -310,12 +429,250 @@ fn big_whitespace() { let string = "Neque in sem. Pellentesque tellus augue."; let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); assert_eq!( - break_string(20, false, &graphemes[..]), + break_string(20, false, "", &graphemes[..]), SnippetState::LineEnd("Neque in sem. ".to_string(), 25) ); assert_eq!( - break_string(20, true, &graphemes[..]), + break_string(20, true, "", &graphemes[..]), SnippetState::LineEnd("Neque in sem.".to_string(), 25) ); } + + #[test] + fn newline_in_candidate_line() { + let string = "Nulla\nconsequat erat at massa. Vivamus id mi."; + + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!( + break_string(25, false, "", &graphemes[..]), + SnippetState::EndWithLineFeed("Nulla\n".to_string(), 6) + ); + assert_eq!( + break_string(25, true, "", &graphemes[..]), + SnippetState::EndWithLineFeed("Nulla\n".to_string(), 6) + ); + + let mut config: Config = Default::default(); + config.set().max_width(27); + let fmt = StringFormat::new(Shape::legacy(25, Indent::empty()), &config); + let rewritten_string = rewrite_string(string, &fmt, 27); + assert_eq!( + rewritten_string, + Some("\"Nulla\nconsequat erat at massa. \\\n Vivamus id mi.\"".to_string()) + ); + } + + #[test] + fn last_line_fit_with_trailing_whitespaces() { + let string = "Vivamus id mi. "; + let config: Config = Default::default(); + let mut fmt = StringFormat::new(Shape::legacy(25, Indent::empty()), &config); + + fmt.trim_end = true; + let rewritten_string = rewrite_string(string, &fmt, 25); + assert_eq!(rewritten_string, Some("\"Vivamus id mi.\"".to_string())); + + fmt.trim_end = false; // default value of trim_end + let rewritten_string = rewrite_string(string, &fmt, 25); + assert_eq!(rewritten_string, Some("\"Vivamus id mi. \"".to_string())); + } + + #[test] + fn last_line_fit_with_newline() { + let string = "Vivamus id mi.\nVivamus id mi."; + let config: Config = Default::default(); + let fmt = StringFormat { + opener: "", + closer: "", + line_start: "// ", + line_end: "", + shape: Shape::legacy(100, Indent::from_width(&config, 4)), + trim_end: true, + config: &config, + }; + + let rewritten_string = rewrite_string(string, &fmt, 100); + assert_eq!( + rewritten_string, + Some("Vivamus id mi.\n // Vivamus id mi.".to_string()) + ); + } + + #[test] + fn overflow_in_non_string_content() { + let comment = "Aenean metus.\nVestibulum ac lacus. Vivamus porttitor"; + let config: Config = Default::default(); + let fmt = StringFormat { + opener: "", + closer: "", + line_start: "// ", + line_end: "", + shape: Shape::legacy(30, Indent::from_width(&config, 8)), + trim_end: true, + config: &config, + }; + + assert_eq!( + rewrite_string(comment, &fmt, 30), + Some( + "Aenean metus.\n // Vestibulum ac lacus. Vivamus\n // porttitor" + .to_string() + ) + ); + } + + #[test] + fn overflow_in_non_string_content_with_line_end() { + let comment = "Aenean metus.\nVestibulum ac lacus. Vivamus porttitor"; + let config: Config = Default::default(); + let fmt = StringFormat { + opener: "", + closer: "", + line_start: "// ", + line_end: "@", + shape: Shape::legacy(30, Indent::from_width(&config, 8)), + trim_end: true, + config: &config, + }; + + assert_eq!( + rewrite_string(comment, &fmt, 30), + Some( + "Aenean metus.\n // Vestibulum ac lacus. Vivamus@\n // porttitor" + .to_string() + ) + ); + } + + #[test] + fn blank_line_with_non_empty_line_start() { + let config: Config = Default::default(); + let mut fmt = StringFormat { + opener: "", + closer: "", + line_start: "// ", + line_end: "", + shape: Shape::legacy(30, Indent::from_width(&config, 4)), + trim_end: true, + config: &config, + }; + + let comment = "Aenean metus. Vestibulum\n\nac lacus. Vivamus porttitor"; + assert_eq!( + rewrite_string(comment, &fmt, 30), + Some( + "Aenean metus. Vestibulum\n //\n // ac lacus. Vivamus porttitor".to_string() + ) + ); + + fmt.shape = Shape::legacy(15, Indent::from_width(&config, 4)); + let comment = "Aenean\n\nmetus. Vestibulum ac lacus. Vivamus porttitor"; + assert_eq!( + rewrite_string(comment, &fmt, 15), + Some( + r#"Aenean + // + // metus. Vestibulum + // ac lacus. Vivamus + // porttitor"# + .to_string() + ) + ); + } + + #[test] + fn retain_blank_lines() { + let config: Config = Default::default(); + let fmt = StringFormat { + opener: "", + closer: "", + line_start: "// ", + line_end: "", + shape: Shape::legacy(20, Indent::from_width(&config, 4)), + trim_end: true, + config: &config, + }; + + let comment = "Aenean\n\nmetus. Vestibulum ac lacus.\n\n"; + assert_eq!( + rewrite_string(comment, &fmt, 20), + Some( + "Aenean\n //\n // metus. Vestibulum ac\n // lacus.\n //\n".to_string() + ) + ); + + let comment = "Aenean\n\nmetus. Vestibulum ac lacus.\n"; + assert_eq!( + rewrite_string(comment, &fmt, 20), + Some("Aenean\n //\n // metus. Vestibulum ac\n // lacus.\n".to_string()) + ); + + let comment = "Aenean\n \nmetus. Vestibulum ac lacus."; + assert_eq!( + rewrite_string(comment, &fmt, 20), + Some("Aenean\n //\n // metus. Vestibulum ac\n // lacus.".to_string()) + ); + } + + #[test] + fn boundary_on_edge() { + let config: Config = Default::default(); + let mut fmt = StringFormat { + opener: "", + closer: "", + line_start: "// ", + line_end: "", + shape: Shape::legacy(13, Indent::from_width(&config, 4)), + trim_end: true, + config: &config, + }; + + let comment = "Aenean metus. Vestibulum ac lacus."; + assert_eq!( + rewrite_string(comment, &fmt, 13), + Some("Aenean metus.\n // Vestibulum ac\n // lacus.".to_string()) + ); + + fmt.trim_end = false; + let comment = "Vestibulum ac lacus."; + assert_eq!( + rewrite_string(comment, &fmt, 13), + Some("Vestibulum \n // ac lacus.".to_string()) + ); + + fmt.trim_end = true; + fmt.line_end = "\\"; + let comment = "Vestibulum ac lacus."; + assert_eq!( + rewrite_string(comment, &fmt, 13), + Some("Vestibulum\\\n // ac lacus.".to_string()) + ); + } + + #[test] + fn detect_urls() { + let string = "aaa http://example.org something"; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!(detect_url(&graphemes, 8), Some(21)); + + let string = "https://example.org something"; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!(detect_url(&graphemes, 0), Some(18)); + + let string = "aaa ftp://example.org something"; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!(detect_url(&graphemes, 8), Some(20)); + + let string = "aaa file://example.org something"; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!(detect_url(&graphemes, 8), Some(21)); + + let string = "aaa http not an url"; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!(detect_url(&graphemes, 6), None); + + let string = "aaa file://example.org"; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!(detect_url(&graphemes, 8), Some(21)); + } }