-// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
// Format string literals.
use regex::Regex;
+use unicode_categories::UnicodeCategories;
use unicode_segmentation::UnicodeSegmentation;
-use config::Config;
-use shape::Shape;
-use utils::wrap_str;
+use crate::config::Config;
+use crate::shape::Shape;
+use crate::utils::{unicode_str_width, wrap_str};
const MIN_STRING: usize = 10;
/// Describes the layout of a piece of text.
-pub struct StringFormat<'a> {
+pub(crate) struct StringFormat<'a> {
/// The opening sequence of characters for the piece of text
- pub opener: &'a str,
+ pub(crate) opener: &'a str,
/// The closing sequence of characters for the piece of text
- pub closer: &'a str,
+ pub(crate) closer: &'a str,
/// The opening sequence of characters for a line
- pub line_start: &'a str,
+ pub(crate) line_start: &'a str,
/// The closing sequence of characters for a line
- pub line_end: &'a str,
+ pub(crate) line_end: &'a str,
/// The allocated box to fit the text into
- pub shape: Shape,
+ pub(crate) shape: Shape,
/// Trim trailing whitespaces
- pub trim_end: bool,
- pub config: &'a Config,
+ pub(crate) trim_end: bool,
+ pub(crate) config: &'a Config,
}
impl<'a> StringFormat<'a> {
- pub fn new(shape: Shape, config: &'a Config) -> StringFormat<'a> {
+ pub(crate) fn new(shape: Shape, config: &'a Config) -> StringFormat<'a> {
StringFormat {
opener: "\"",
closer: "\"",
/// indentation into account.
///
/// If we cannot put at least a single character per line, the rewrite won't succeed.
- fn max_chars_with_indent(&self) -> Option<usize> {
+ fn max_width_with_indent(&self) -> Option<usize> {
Some(
self.shape
.width
)
}
- /// Like max_chars_with_indent but the indentation is not subtracted.
+ /// Like max_width_with_indent but the indentation is not subtracted.
/// This allows to fit more graphemes from the string on a line when
/// SnippetState::EndWithLineFeed.
- fn max_chars_without_indent(&self) -> Option<usize> {
- Some(self.config.max_width().checked_sub(self.line_end.len())?)
+ fn max_width_without_indent(&self) -> Option<usize> {
+ self.config.max_width().checked_sub(self.line_end.len())
}
}
-pub fn rewrite_string<'a>(
+pub(crate) fn rewrite_string<'a>(
orig: &str,
fmt: &StringFormat<'a>,
newline_max_chars: usize,
) -> Option<String> {
- let max_chars_with_indent = fmt.max_chars_with_indent()?;
- let max_chars_without_indent = fmt.max_chars_without_indent()?;
+ let max_width_with_indent = fmt.max_width_with_indent()?;
+ let max_width_without_indent = fmt.max_width_without_indent()?;
let indent_with_newline = fmt.shape.indent.to_string_with_newline(fmt.config);
let indent_without_newline = fmt.shape.indent.to_string(fmt.config);
// Snip a line at a time from `stripped_str` until it is used up. Push the snippet
// onto result.
- let mut cur_max_chars = max_chars_with_indent;
+ let mut cur_max_width = max_width_with_indent;
let is_bareline_ok = fmt.line_start.is_empty() || is_whitespace(fmt.line_start);
loop {
// All the input starting at cur_start fits on the current line
- if graphemes.len() - cur_start <= cur_max_chars {
+ if graphemes_width(&graphemes[cur_start..]) <= cur_max_width {
for (i, grapheme) in graphemes[cur_start..].iter().enumerate() {
if is_new_line(grapheme) {
// take care of blank lines
result = trim_end_but_line_feed(fmt.trim_end, result);
- result.push_str("\n");
+ result.push('\n');
if !is_bareline_ok && cur_start + i + 1 < graphemes.len() {
result.push_str(&indent_without_newline);
result.push_str(fmt.line_start);
// The input starting at cur_start needs to be broken
match break_string(
- cur_max_chars,
+ cur_max_width,
fmt.trim_end,
fmt.line_end,
&graphemes[cur_start..],
result.push_str(fmt.line_end);
result.push_str(&indent_with_newline);
result.push_str(fmt.line_start);
- cur_max_chars = newline_max_chars;
+ cur_max_width = newline_max_chars;
cur_start += len;
}
SnippetState::EndWithLineFeed(line, len) => {
result.push_str(&line);
if is_bareline_ok {
// the next line can benefit from the full width
- cur_max_chars = max_chars_without_indent;
+ cur_max_width = max_width_without_indent;
} else {
result.push_str(&indent_without_newline);
result.push_str(fmt.line_start);
- cur_max_chars = max_chars_with_indent;
+ cur_max_width = max_width_with_indent;
}
cur_start += len;
}
wrap_str(result, fmt.config.max_width(), fmt.shape)
}
-/// Returns the index to the end of the url if the given string includes an
-/// URL or alike. Otherwise, returns None;
+/// Returns the index to the end of the URL if the split at index of the given string includes a
+/// URL or alike. Otherwise, returns `None`.
fn detect_url(s: &[&str], index: usize) -> Option<usize> {
let start = match s[..=index].iter().rposition(|g| is_whitespace(g)) {
Some(pos) => pos + 1,
None => 0,
};
+ // 8 = minimum length for a string to contain a URL
if s.len() < start + 8 {
return None;
}
- let prefix = s[start..start + 8].concat();
- if prefix.starts_with("https://")
- || prefix.starts_with("http://")
- || prefix.starts_with("ftp://")
- || prefix.starts_with("file://")
+ let split = s[start..].concat();
+ if split.contains("https://")
+ || split.contains("http://")
+ || split.contains("ftp://")
+ || split.contains("file://")
{
match s[index..].iter().position(|g| is_whitespace(g)) {
Some(pos) => Some(index + pos - 1),
is_new_line(g) || !is_whitespace(g)
}
-/// Break the input string at a boundary character around the offset `max_chars`. A boundary
+/// Break the input string at a boundary character around the offset `max_width`. A boundary
/// character is either a punctuation or a whitespace.
-fn break_string(max_chars: usize, trim_end: bool, line_end: &str, input: &[&str]) -> SnippetState {
+/// FIXME(issue#3281): We must follow UAX#14 algorithm instead of this.
+fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str]) -> SnippetState {
let break_at = |index /* grapheme at index is included */| {
// Take in any whitespaces to the left/right of `input[index]` while
// preserving line feeds
}
};
+ // find a first index where the unicode width of input[0..x] become > max_width
+ let max_width_index_in_input = {
+ let mut cur_width = 0;
+ let mut cur_index = 0;
+ for (i, grapheme) in input.iter().enumerate() {
+ cur_width += unicode_str_width(grapheme);
+ cur_index = i;
+ if cur_width > max_width {
+ break;
+ }
+ }
+ cur_index
+ };
+ if max_width_index_in_input == 0 {
+ return SnippetState::EndOfInput(input.concat());
+ }
+
// Find the position in input for breaking the string
if line_end.is_empty()
&& trim_end
- && !is_whitespace(input[max_chars - 1])
- && is_whitespace(input[max_chars])
+ && !is_whitespace(input[max_width_index_in_input - 1])
+ && is_whitespace(input[max_width_index_in_input])
{
// At a breaking point already
// The line won't invalidate the rewriting because:
// - no extra space needed for the line_end character
// - extra whitespaces to the right can be trimmed
- return break_at(max_chars - 1);
+ return break_at(max_width_index_in_input - 1);
}
- if let Some(url_index_end) = detect_url(input, max_chars) {
+ if let Some(url_index_end) = detect_url(input, max_width_index_in_input) {
let index_plus_ws = url_index_end
+ input[url_index_end..]
.iter()
return if trim_end {
SnippetState::LineEnd(input[..=url_index_end].concat(), index_plus_ws + 1)
} else {
- return SnippetState::LineEnd(input[..=index_plus_ws].concat(), index_plus_ws + 1);
+ SnippetState::LineEnd(input[..=index_plus_ws].concat(), index_plus_ws + 1)
};
}
- match input[0..max_chars]
+
+ match input[0..max_width_index_in_input]
.iter()
.rposition(|grapheme| is_whitespace(grapheme))
{
// Found a whitespace and what is on its left side is big enough.
Some(index) if index >= MIN_STRING => break_at(index),
// No whitespace found, try looking for a punctuation instead
- _ => match input[0..max_chars]
- .iter()
- .rposition(|grapheme| is_punctuation(grapheme))
+ _ => match (0..max_width_index_in_input)
+ .rev()
+ .skip_while(|pos| !is_valid_linebreak(input, *pos))
+ .next()
{
// Found a punctuation and what is on its left side is big enough.
Some(index) if index >= MIN_STRING => break_at(index),
// Either no boundary character was found to the left of `input[max_chars]`, or the line
// got too small. We try searching for a boundary character to the right.
- _ => match input[max_chars..]
- .iter()
- .position(|grapheme| is_whitespace(grapheme) || is_punctuation(grapheme))
+ _ => match (max_width_index_in_input..input.len())
+ .skip_while(|pos| !is_valid_linebreak(input, *pos))
+ .next()
{
// A boundary was found after the line limit
- Some(index) => break_at(max_chars + index),
+ Some(index) => break_at(index),
// No boundary to the right, the input cannot be broken
None => SnippetState::EndOfInput(input.concat()),
},
}
}
+fn is_valid_linebreak(input: &[&str], pos: usize) -> bool {
+ let is_whitespace = is_whitespace(input[pos]);
+ if is_whitespace {
+ return true;
+ }
+ let is_punctuation = is_punctuation(input[pos]);
+ if is_punctuation && !is_part_of_type(input, pos) {
+ return true;
+ }
+ false
+}
+
+fn is_part_of_type(input: &[&str], pos: usize) -> bool {
+ input.get(pos..=pos + 1) == Some(&[":", ":"])
+ || input.get(pos.saturating_sub(1)..=pos) == Some(&[":", ":"])
+}
+
fn is_new_line(grapheme: &str) -> bool {
let bytes = grapheme.as_bytes();
bytes.starts_with(b"\n") || bytes.starts_with(b"\r\n")
}
fn is_whitespace(grapheme: &str) -> bool {
- grapheme.chars().all(|c| c.is_whitespace())
+ grapheme.chars().all(char::is_whitespace)
}
fn is_punctuation(grapheme: &str) -> bool {
- match grapheme.as_bytes()[0] {
- b':' | b',' | b';' | b'.' => true,
- _ => false,
- }
+ grapheme
+ .chars()
+ .all(UnicodeCategories::is_punctuation_other)
+}
+
+fn graphemes_width(graphemes: &[&str]) -> usize {
+ graphemes.iter().map(|s| unicode_str_width(s)).sum()
}
#[cfg(test)]
mod test {
use super::{break_string, detect_url, rewrite_string, SnippetState, StringFormat};
- use config::Config;
- use shape::{Indent, Shape};
+ use crate::config::Config;
+ use crate::shape::{Indent, Shape};
use unicode_segmentation::UnicodeSegmentation;
#[test]
rewrite_string("eq_", &fmt, 2);
}
+ #[test]
+ fn line_break_at_valid_points_test() {
+ let string = "[TheName](Dont::break::my::type::That::would::be::very::nice) break here";
+ let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::<Vec<&str>>();
+ assert_eq!(
+ break_string(20, false, "", &graphemes[..]),
+ SnippetState::LineEnd(
+ "[TheName](Dont::break::my::type::That::would::be::very::nice) ".to_string(),
+ 62
+ )
+ );
+ }
+
#[test]
fn should_break_on_whitespace() {
let string = "Placerat felis. Mauris porta ante sagittis purus.";