]> git.lizzy.rs Git - rust.git/commitdiff
Only escape extended grapheme characters in the first position
authorvarkor <github@varkor.com>
Thu, 17 May 2018 09:45:34 +0000 (10:45 +0100)
committervarkor <github@varkor.com>
Mon, 21 May 2018 17:57:54 +0000 (18:57 +0100)
src/liballoc/str.rs
src/liballoc/tests/str.rs
src/libcore/char/methods.rs
src/libcore/tests/char.rs

index c10c0a69433920ba0ff45f6054cab8e4e5e27603..8af14d3c698d2573d60d5be3ec0ef39586ed23cf 100644 (file)
@@ -372,12 +372,15 @@ pub fn to_uppercase(&self) -> String {
 
     /// Escapes each char in `s` with [`char::escape_debug`].
     ///
+    /// Note: only extended grapheme codepoints that begin the string will be
+    /// escaped.
+    ///
     /// [`char::escape_debug`]: primitive.char.html#method.escape_debug
     #[unstable(feature = "str_escape",
                reason = "return type may change to be an iterator",
                issue = "27791")]
     pub fn escape_debug(&self) -> String {
-        self.chars().flat_map(|c| c.escape_debug()).collect()
+        self.chars().enumerate().flat_map(|(i, c)| c.escape_debug_ext(i == 0)).collect()
     }
 
     /// Escapes each char in `s` with [`char::escape_default`].
index 2f38c8b3ae21e9ae0ac6e8523b90cffd98d35792..84c97abcbc28f850914634cda1a5051edd96473e 100644 (file)
@@ -999,7 +999,7 @@ fn test_escape_debug() {
     assert_eq!("\u{10000}\u{10ffff}".escape_debug(), "\u{10000}\\u{10ffff}");
     assert_eq!("ab\u{200b}".escape_debug(), "ab\\u{200b}");
     assert_eq!("\u{10d4ea}\r".escape_debug(), "\\u{10d4ea}\\r");
-    assert_eq!("\u{301}a\u{301}bé\u{e000}".escape_debug(), "\\u{301}a\\u{301}bé\\u{e000}");
+    assert_eq!("\u{301}a\u{301}bé\u{e000}".escape_debug(), "\\u{301}a\u{301}bé\\u{e000}");
 }
 
 #[test]
index bf7772492e5bc8c5b6dcdb23cd8a24180ca135f1..f6b201fe06dea429e44b9b6e8006bb5b500db179 100644 (file)
@@ -187,6 +187,27 @@ pub fn escape_unicode(self) -> EscapeUnicode {
         }
     }
 
+    /// An extended version of `escape_debug` that optionally permits escaping
+    /// Extended Grapheme codepoints. This allows us to format characters like
+    /// nonspacing marks better when they're at the start of a string.
+    #[doc(hidden)]
+    #[unstable(feature = "str_internals", issue = "0")]
+    #[inline]
+    pub fn escape_debug_ext(self, escape_grapheme_extended: bool) -> EscapeDebug {
+        let init_state = match self {
+            '\t' => EscapeDefaultState::Backslash('t'),
+            '\r' => EscapeDefaultState::Backslash('r'),
+            '\n' => EscapeDefaultState::Backslash('n'),
+            '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
+            _ if escape_grapheme_extended && self.is_grapheme_extended() => {
+                EscapeDefaultState::Unicode(self.escape_unicode())
+            }
+            _ if is_printable(self) => EscapeDefaultState::Char(self),
+            _ => EscapeDefaultState::Unicode(self.escape_unicode()),
+        };
+        EscapeDebug(EscapeDefault { state: init_state })
+    }
+
     /// Returns an iterator that yields the literal escape code of a character
     /// as `char`s.
     ///
@@ -224,18 +245,7 @@ pub fn escape_unicode(self) -> EscapeUnicode {
     #[stable(feature = "char_escape_debug", since = "1.20.0")]
     #[inline]
     pub fn escape_debug(self) -> EscapeDebug {
-        let init_state = match self {
-            '\t' => EscapeDefaultState::Backslash('t'),
-            '\r' => EscapeDefaultState::Backslash('r'),
-            '\n' => EscapeDefaultState::Backslash('n'),
-            '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
-            _ if self.is_grapheme_extended() => {
-                EscapeDefaultState::Unicode(self.escape_unicode())
-            }
-            _ if is_printable(self) => EscapeDefaultState::Char(self),
-            _ => EscapeDefaultState::Unicode(self.escape_unicode()),
-        };
-        EscapeDebug(EscapeDefault { state: init_state })
+        self.escape_debug_ext(true)
     }
 
     /// Returns an iterator that yields the literal escape code of a character
index d19e3b527696f93f942f248e65c69dde347db611..d2a9ed75be658abd1de4a8b2920700ede283ecb6 100644 (file)
@@ -181,7 +181,7 @@ fn string(c: char) -> String {
     assert_eq!(string('\u{ff}'), "\u{ff}");
     assert_eq!(string('\u{11b}'), "\u{11b}");
     assert_eq!(string('\u{1d4b6}'), "\u{1d4b6}");
-    assert_eq!(string('\u{301}'), "'\\u{301}'");     // combining character
+    assert_eq!(string('\u{301}'), "\\u{301}");     // combining character
     assert_eq!(string('\u{200b}'),"\\u{200b}");      // zero width space
     assert_eq!(string('\u{e000}'), "\\u{e000}");     // private use 1
     assert_eq!(string('\u{100000}'), "\\u{100000}"); // private use 2