1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 use super::strcursor::StrCursor as Cur;
14 /// Represents a single `printf`-style substitution.
15 #[derive(Clone, PartialEq, Debug)]
16 pub enum Substitution<'a> {
17 /// A formatted output substitution with its internal byte offset.
19 /// A literal `%%` escape.
23 impl<'a> Substitution<'a> {
24 pub fn as_str(&self) -> &str {
26 Substitution::Format(ref fmt) => fmt.span,
27 Substitution::Escape => "%%",
31 pub fn position(&self) -> Option<(usize, usize)> {
33 Substitution::Format(ref fmt) => Some(fmt.position),
38 pub fn set_position(&mut self, start: usize, end: usize) {
40 Substitution::Format(ref mut fmt) => {
41 fmt.position = (start, end);
48 /// Translate this substitution into an equivalent Rust formatting directive.
50 /// This ignores cases where the substitution does not have an exact equivalent, or where
51 /// the substitution would be unnecessary.
52 pub fn translate(&self) -> Option<String> {
54 Substitution::Format(ref fmt) => fmt.translate(),
55 Substitution::Escape => None,
60 #[derive(Clone, PartialEq, Debug)]
61 /// A single `printf`-style formatting directive.
62 pub struct Format<'a> {
63 /// The entire original formatting directive.
65 /// The (1-based) parameter to be converted.
66 pub parameter: Option<u16>,
69 /// Minimum width of the output.
70 pub width: Option<Num>,
71 /// Precision of the conversion.
72 pub precision: Option<Num>,
73 /// Length modifier for the conversion.
74 pub length: Option<&'a str>,
75 /// Type of parameter being converted.
77 /// Byte offset for the start and end of this formatting directive.
78 pub position: (usize, usize),
82 /// Translate this directive into an equivalent Rust formatting directive.
84 /// Returns `None` in cases where the `printf` directive does not have an exact Rust
85 /// equivalent, rather than guessing.
86 pub fn translate(&self) -> Option<String> {
89 let (c_alt, c_zero, c_left, c_plus) = {
90 let mut c_alt = false;
91 let mut c_zero = false;
92 let mut c_left = false;
93 let mut c_plus = false;
94 for c in self.flags.chars() {
103 (c_alt, c_zero, c_left, c_plus)
106 // Has a special form in Rust for numbers.
107 let fill = if c_zero { Some("0") } else { None };
109 let align = if c_left { Some("<") } else { None };
111 // Rust doesn't have an equivalent to the `' '` flag.
112 let sign = if c_plus { Some("+") } else { None };
114 // Not *quite* the same, depending on the type...
117 let width = match self.width {
119 // NOTE: Rust doesn't support this.
122 w @ Some(Num::Arg(_)) => w,
123 w @ Some(Num::Num(_)) => w,
127 let precision = self.precision;
129 // NOTE: although length *can* have an effect, we can't duplicate the effect in Rust, so
130 // we just ignore it.
132 let (type_, use_zero_fill, is_int) = match self.type_ {
133 "d" | "i" | "u" => (None, true, true),
134 "f" | "F" => (None, false, false),
135 "s" | "c" => (None, false, false),
136 "e" | "E" => (Some(self.type_), true, false),
137 "x" | "X" | "o" => (Some(self.type_), true, true),
138 "p" => (Some(self.type_), false, true),
139 "g" => (Some("e"), true, false),
140 "G" => (Some("E"), true, false),
144 let (fill, width, precision) = match (is_int, width, precision) {
145 (true, Some(_), Some(_)) => {
146 // Rust can't duplicate this insanity.
149 (true, None, Some(p)) => (Some("0"), Some(p), None),
150 (true, w, None) => (fill, w, None),
151 (false, w, p) => (fill, w, p),
154 let align = match (self.type_, width.is_some(), align.is_some()) {
155 ("s", true, false) => Some(">"),
159 let (fill, zero_fill) = match (fill, use_zero_fill) {
160 (Some("0"), true) => (None, true),
161 (fill, _) => (fill, false),
164 let alt = match type_ {
165 Some("x") | Some("X") => alt,
169 let has_options = fill.is_some()
175 || precision.is_some()
179 // Initialise with a rough guess.
180 let cap = self.span.len() + if has_options { 2 } else { 0 };
181 let mut s = String::with_capacity(cap);
185 if let Some(arg) = self.parameter {
186 write!(s, "{}", arg.checked_sub(1)?).ok()?;
192 let align = if let Some(fill) = fill {
199 if let Some(align) = align {
203 if let Some(sign) = sign {
215 if let Some(width) = width {
216 width.translate(&mut s).ok()?;
219 if let Some(precision) = precision {
221 precision.translate(&mut s).ok()?;
224 if let Some(type_) = type_ {
234 /// A general number used in a `printf` formatting directive.
235 #[derive(Copy, Clone, PartialEq, Debug)]
237 // The range of these values is technically bounded by `NL_ARGMAX`... but, at least for GNU
238 // libc, it apparently has no real fixed limit. A `u16` is used here on the basis that it
239 // is *vanishingly* unlikely that *anyone* is going to try formatting something wider, or
240 // with more precision, than 32 thousand positions which is so wide it couldn't possibly fit
243 /// A specific, fixed value.
245 /// The value is derived from a positional argument.
247 /// The value is derived from the "next" unconverted argument.
252 fn from_str(s: &str, arg: Option<&str>) -> Self {
253 if let Some(arg) = arg {
254 Num::Arg(arg.parse().unwrap_or_else(|_| panic!("invalid format arg `{:?}`", arg)))
258 Num::Num(s.parse().unwrap_or_else(|_| panic!("invalid format num `{:?}`", s)))
262 fn translate(&self, s: &mut String) -> ::std::fmt::Result {
265 Num::Num(n) => write!(s, "{}", n),
267 let n = try!(n.checked_sub(1).ok_or(::std::fmt::Error));
270 Num::Next => write!(s, "*"),
275 /// Returns an iterator over all substitutions in a given string.
276 pub fn iter_subs(s: &str) -> Substitutions {
283 /// Iterator over substitutions in a string.
284 pub struct Substitutions<'a> {
289 impl<'a> Iterator for Substitutions<'a> {
290 type Item = Substitution<'a>;
291 fn next(&mut self) -> Option<Self::Item> {
292 let (mut sub, tail) = parse_next_substitution(self.s)?;
295 Substitution::Format(_) => if let Some((start, end)) = sub.position() {
296 sub.set_position(start + self.pos, end + self.pos);
299 Substitution::Escape => self.pos += 2,
304 fn size_hint(&self) -> (usize, Option<usize>) {
305 // Substitutions are at least 2 characters long.
306 (0, Some(self.s.len() / 2))
321 /// Parse the next substitution from the input string.
322 pub fn parse_next_substitution(s: &str) -> Option<(Substitution, &str)> {
326 let start = s.find('%')?;
327 match s[start+1..].chars().next()? {
328 '%' => return Some((Substitution::Escape, &s[start+2..])),
329 _ => {/* fall-through */},
332 Cur::new_at(&s[..], start)
335 // This is meant to be a translation of the following regex:
340 // (?: (?P<parameter> \d+) \$ )?
341 // (?P<flags> [-+ 0\#']* )
342 // (?P<width> \d+ | \* (?: (?P<widtha> \d+) \$ )? )?
343 // (?: \. (?P<precision> \d+ | \* (?: (?P<precisiona> \d+) \$ )? ) )?
346 // hh | h | ll | l | L | z | j | t
349 // | I32 | I64 | I | q
354 // Used to establish the full span at the end.
356 // The current position within the string.
357 let mut at = at.at_next_cp()?;
358 // `c` is the next codepoint, `next` is a cursor after it.
359 let (mut c, mut next) = at.next_cp()?;
361 // Update `at`, `c`, and `next`, exiting if we're out of input.
362 macro_rules! move_to {
366 let (c_, next_) = at.next_cp()?;
373 // Constructs a result when parsing fails.
375 // Note: `move` used to capture copies of the cursors as they are *now*.
376 let fallback = move || {
378 Substitution::Format(Format {
379 span: start.slice_between(next).unwrap(),
385 type_: at.slice_between(next).unwrap(),
386 position: (start.at, next.at),
392 // Next parsing state.
393 let mut state = Start;
395 // Sadly, Rust isn't *quite* smart enough to know these *must* be initialised by the end.
396 let mut parameter: Option<u16> = None;
397 let mut flags: &str = "";
398 let mut width: Option<Num> = None;
399 let mut precision: Option<Num> = None;
400 let mut length: Option<&str> = None;
401 let mut type_: &str = "";
404 if let Start = state {
407 let end = at_next_cp_while(next, is_digit);
408 match end.next_cp() {
409 // Yes, this *is* the parameter.
410 Some(('$', end2)) => {
412 parameter = Some(at.slice_between(end).unwrap().parse().unwrap());
415 // Wait, no, actually, it's the width.
420 width = Some(Num::from_str(at.slice_between(end).unwrap(), None));
423 // It's invalid, is what it is.
424 None => return fallback(),
435 if let Flags = state {
436 let end = at_next_cp_while(at, is_flag);
438 flags = at.slice_between(end).unwrap();
442 if let Width = state {
449 let end = at_next_cp_while(next, is_digit);
451 width = Some(Num::from_str(at.slice_between(end).unwrap(), None));
462 if let WidthArg = state {
463 let end = at_next_cp_while(at, is_digit);
464 match end.next_cp() {
465 Some(('$', end2)) => {
467 width = Some(Num::from_str("", Some(at.slice_between(end).unwrap())));
472 width = Some(Num::Next);
478 if let Prec = state {
492 if let PrecInner = state {
495 let end = at_next_cp_while(next, is_digit);
496 match end.next_cp() {
497 Some(('$', end2)) => {
499 precision = Some(Num::from_str("*", next.slice_between(end)));
504 precision = Some(Num::Next);
510 let end = at_next_cp_while(next, is_digit);
512 precision = Some(Num::from_str(at.slice_between(end).unwrap(), None));
515 _ => return fallback(),
519 if let Length = state {
520 let c1_next1 = next.next_cp();
521 match (c, c1_next1) {
522 ('h', Some(('h', next1)))
523 | ('l', Some(('l', next1)))
526 length = Some(at.slice_between(next1).unwrap());
530 ('h', _) | ('l', _) | ('L', _)
531 | ('z', _) | ('j', _) | ('t', _)
535 length = Some(at.slice_between(next).unwrap());
540 let end = next.at_next_cp()
541 .and_then(|end| end.at_next_cp())
542 .map(|end| (next.slice_between(end).unwrap(), end));
543 let end = match end {
544 Some(("32", end)) => end,
545 Some(("64", end)) => end,
549 length = Some(at.slice_between(end).unwrap());
561 if let Type = state {
563 type_ = at.slice_between(next).unwrap();
565 // Don't use `move_to!` here, as we *can* be at the end of the input.
573 let position = (start.at, end.at);
576 span: start.slice_between(end).unwrap(),
585 Some((Substitution::Format(f), end.slice_after()))
588 fn at_next_cp_while<F>(mut cur: Cur, mut pred: F) -> Cur
589 where F: FnMut(char) -> bool {
591 match cur.next_cp() {
592 Some((c, next)) => if pred(c) {
602 fn is_digit(c: char) -> bool {
609 fn is_flag(c: char) -> bool {
611 '0' | '-' | '+' | ' ' | '#' | '\'' => true,
623 parse_next_substitution as pns,
626 macro_rules! assert_eq_pnsat {
627 ($lhs:expr, $rhs:expr) => {
629 pns($lhs).and_then(|(s, _)| s.translate()),
630 $rhs.map(<String as From<&str>>::from)
637 assert_eq!(pns("has no escapes"), None);
638 assert_eq!(pns("has no escapes, either %"), None);
639 assert_eq!(pns("*so* has a %% escape"), Some((S::Escape," escape")));
640 assert_eq!(pns("%% leading escape"), Some((S::Escape, " leading escape")));
641 assert_eq!(pns("trailing escape %%"), Some((S::Escape, "")));
646 macro_rules! assert_pns_eq_sub {
648 $param:expr, $flags:expr,
649 $width:expr, $prec:expr, $len:expr, $type_:expr,
653 pns(concat!($in_, "!")),
671 assert_pns_eq_sub!("%!",
672 { None, "", None, None, None, "!", (0, 2), });
673 assert_pns_eq_sub!("%c",
674 { None, "", None, None, None, "c", (0, 2), });
675 assert_pns_eq_sub!("%s",
676 { None, "", None, None, None, "s", (0, 2), });
677 assert_pns_eq_sub!("%06d",
678 { None, "0", Some(N::Num(6)), None, None, "d", (0, 4), });
679 assert_pns_eq_sub!("%4.2f",
680 { None, "", Some(N::Num(4)), Some(N::Num(2)), None, "f", (0, 5), });
681 assert_pns_eq_sub!("%#x",
682 { None, "#", None, None, None, "x", (0, 3), });
683 assert_pns_eq_sub!("%-10s",
684 { None, "-", Some(N::Num(10)), None, None, "s", (0, 5), });
685 assert_pns_eq_sub!("%*s",
686 { None, "", Some(N::Next), None, None, "s", (0, 3), });
687 assert_pns_eq_sub!("%-10.*s",
688 { None, "-", Some(N::Num(10)), Some(N::Next), None, "s", (0, 7), });
689 assert_pns_eq_sub!("%-*.*s",
690 { None, "-", Some(N::Next), Some(N::Next), None, "s", (0, 6), });
691 assert_pns_eq_sub!("%.6i",
692 { None, "", None, Some(N::Num(6)), None, "i", (0, 4), });
693 assert_pns_eq_sub!("%+i",
694 { None, "+", None, None, None, "i", (0, 3), });
695 assert_pns_eq_sub!("%08X",
696 { None, "0", Some(N::Num(8)), None, None, "X", (0, 4), });
697 assert_pns_eq_sub!("%lu",
698 { None, "", None, None, Some("l"), "u", (0, 3), });
699 assert_pns_eq_sub!("%Iu",
700 { None, "", None, None, Some("I"), "u", (0, 3), });
701 assert_pns_eq_sub!("%I32u",
702 { None, "", None, None, Some("I32"), "u", (0, 5), });
703 assert_pns_eq_sub!("%I64u",
704 { None, "", None, None, Some("I64"), "u", (0, 5), });
705 assert_pns_eq_sub!("%'d",
706 { None, "'", None, None, None, "d", (0, 3), });
707 assert_pns_eq_sub!("%10s",
708 { None, "", Some(N::Num(10)), None, None, "s", (0, 4), });
709 assert_pns_eq_sub!("%-10.10s",
710 { None, "-", Some(N::Num(10)), Some(N::Num(10)), None, "s", (0, 8), });
711 assert_pns_eq_sub!("%1$d",
712 { Some(1), "", None, None, None, "d", (0, 4), });
713 assert_pns_eq_sub!("%2$.*3$d",
714 { Some(2), "", None, Some(N::Arg(3)), None, "d", (0, 8), });
715 assert_pns_eq_sub!("%1$*2$.*3$d",
716 { Some(1), "", Some(N::Arg(2)), Some(N::Arg(3)), None, "d", (0, 11), });
717 assert_pns_eq_sub!("%-8ld",
718 { None, "-", Some(N::Num(8)), None, Some("l"), "d", (0, 5), });
723 let s = "The %d'th word %% is: `%.*s` %!\n";
724 let subs: Vec<_> = iter_subs(s).map(|sub| sub.translate()).collect();
726 subs.iter().map(|ms| ms.as_ref().map(|s| &s[..])).collect::<Vec<_>>(),
727 vec![Some("{}"), None, Some("{:.*}"), None]
731 /// Check that the translations are what we expect.
733 fn test_translation() {
734 assert_eq_pnsat!("%c", Some("{}"));
735 assert_eq_pnsat!("%d", Some("{}"));
736 assert_eq_pnsat!("%u", Some("{}"));
737 assert_eq_pnsat!("%x", Some("{:x}"));
738 assert_eq_pnsat!("%X", Some("{:X}"));
739 assert_eq_pnsat!("%e", Some("{:e}"));
740 assert_eq_pnsat!("%E", Some("{:E}"));
741 assert_eq_pnsat!("%f", Some("{}"));
742 assert_eq_pnsat!("%g", Some("{:e}"));
743 assert_eq_pnsat!("%G", Some("{:E}"));
744 assert_eq_pnsat!("%s", Some("{}"));
745 assert_eq_pnsat!("%p", Some("{:p}"));
747 assert_eq_pnsat!("%06d", Some("{:06}"));
748 assert_eq_pnsat!("%4.2f", Some("{:4.2}"));
749 assert_eq_pnsat!("%#x", Some("{:#x}"));
750 assert_eq_pnsat!("%-10s", Some("{:<10}"));
751 assert_eq_pnsat!("%*s", None);
752 assert_eq_pnsat!("%-10.*s", Some("{:<10.*}"));
753 assert_eq_pnsat!("%-*.*s", None);
754 assert_eq_pnsat!("%.6i", Some("{:06}"));
755 assert_eq_pnsat!("%+i", Some("{:+}"));
756 assert_eq_pnsat!("%08X", Some("{:08X}"));
757 assert_eq_pnsat!("%lu", Some("{}"));
758 assert_eq_pnsat!("%Iu", Some("{}"));
759 assert_eq_pnsat!("%I32u", Some("{}"));
760 assert_eq_pnsat!("%I64u", Some("{}"));
761 assert_eq_pnsat!("%'d", None);
762 assert_eq_pnsat!("%10s", Some("{:>10}"));
763 assert_eq_pnsat!("%-10.10s", Some("{:<10.10}"));
764 assert_eq_pnsat!("%1$d", Some("{0}"));
765 assert_eq_pnsat!("%2$.*3$d", Some("{1:02$}"));
766 assert_eq_pnsat!("%1$*2$.*3$s", Some("{0:>1$.2$}"));
767 assert_eq_pnsat!("%-8ld", Some("{:<8}"));
773 use super::strcursor::StrCursor as Cur;
775 #[derive(Clone, PartialEq, Debug)]
776 pub enum Substitution<'a> {
782 impl<'a> Substitution<'a> {
783 pub fn as_str(&self) -> String {
785 Substitution::Ordinal(n) => format!("${}", n),
786 Substitution::Name(n) => format!("${}", n),
787 Substitution::Escape => "$$".into(),
791 pub fn position(&self) -> Option<(usize, usize)> {
797 pub fn translate(&self) -> Option<String> {
799 Substitution::Ordinal(n) => Some(format!("{{{}}}", n)),
800 Substitution::Name(n) => Some(format!("{{{}}}", n)),
801 Substitution::Escape => None,
806 /// Returns an iterator over all substitutions in a given string.
807 pub fn iter_subs(s: &str) -> Substitutions {
813 /// Iterator over substitutions in a string.
814 pub struct Substitutions<'a> {
818 impl<'a> Iterator for Substitutions<'a> {
819 type Item = Substitution<'a>;
820 fn next(&mut self) -> Option<Self::Item> {
821 match parse_next_substitution(self.s) {
822 Some((sub, tail)) => {
830 fn size_hint(&self) -> (usize, Option<usize>) {
831 (0, Some(self.s.len()))
835 /// Parse the next substitution from the input string.
836 pub fn parse_next_substitution(s: &str) -> Option<(Substitution, &str)> {
838 let start = s.find('$')?;
839 match s[start+1..].chars().next()? {
840 '$' => return Some((Substitution::Escape, &s[start+2..])),
842 let n = (c as u8) - b'0';
843 return Some((Substitution::Ordinal(n), &s[start+2..]));
845 _ => {/* fall-through */},
848 Cur::new_at_start(&s[start..])
851 let at = at.at_next_cp()?;
852 let (c, inner) = at.next_cp()?;
854 if !is_ident_head(c) {
857 let end = at_next_cp_while(inner, is_ident_tail);
858 Some((Substitution::Name(at.slice_between(end).unwrap()), end.slice_after()))
862 fn at_next_cp_while<F>(mut cur: Cur, mut pred: F) -> Cur
863 where F: FnMut(char) -> bool {
865 match cur.next_cp() {
866 Some((c, next)) => if pred(c) {
876 fn is_ident_head(c: char) -> bool {
878 'a' ..= 'z' | 'A' ..= 'Z' | '_' => true,
883 fn is_ident_tail(c: char) -> bool {
886 c => is_ident_head(c)
894 parse_next_substitution as pns,
897 macro_rules! assert_eq_pnsat {
898 ($lhs:expr, $rhs:expr) => {
900 pns($lhs).and_then(|(f, _)| f.translate()),
901 $rhs.map(<String as From<&str>>::from)
908 assert_eq!(pns("has no escapes"), None);
909 assert_eq!(pns("has no escapes, either $"), None);
910 assert_eq!(pns("*so* has a $$ escape"), Some((S::Escape, " escape")));
911 assert_eq!(pns("$$ leading escape"), Some((S::Escape, " leading escape")));
912 assert_eq!(pns("trailing escape $$"), Some((S::Escape, "")));
917 macro_rules! assert_pns_eq_sub {
918 ($in_:expr, $kind:ident($arg:expr)) => {
919 assert_eq!(pns(concat!($in_, "!")), Some((S::$kind($arg.into()), "!")))
923 assert_pns_eq_sub!("$0", Ordinal(0));
924 assert_pns_eq_sub!("$1", Ordinal(1));
925 assert_pns_eq_sub!("$9", Ordinal(9));
926 assert_pns_eq_sub!("$N", Name("N"));
927 assert_pns_eq_sub!("$NAME", Name("NAME"));
932 use super::iter_subs;
933 let s = "The $0'th word $$ is: `$WORD` $!\n";
934 let subs: Vec<_> = iter_subs(s).map(|sub| sub.translate()).collect();
936 subs.iter().map(|ms| ms.as_ref().map(|s| &s[..])).collect::<Vec<_>>(),
937 vec![Some("{0}"), None, Some("{WORD}")]
942 fn test_translation() {
943 assert_eq_pnsat!("$0", Some("{0}"));
944 assert_eq_pnsat!("$9", Some("{9}"));
945 assert_eq_pnsat!("$1", Some("{1}"));
946 assert_eq_pnsat!("$10", Some("{1}"));
947 assert_eq_pnsat!("$stuff", Some("{stuff}"));
948 assert_eq_pnsat!("$NAME", Some("{NAME}"));
949 assert_eq_pnsat!("$PREFIX/bin", Some("{PREFIX}"));
958 pub struct StrCursor<'a> {
963 impl<'a> StrCursor<'a> {
964 pub fn new_at_start(s: &'a str) -> StrCursor<'a> {
971 pub fn new_at(s: &'a str, at: usize) -> StrCursor<'a> {
978 pub fn at_next_cp(mut self) -> Option<StrCursor<'a>> {
979 match self.try_seek_right_cp() {
985 pub fn next_cp(mut self) -> Option<(char, StrCursor<'a>)> {
986 let cp = self.cp_after()?;
987 self.seek_right(cp.len_utf8());
991 fn slice_before(&self) -> &'a str {
995 pub fn slice_after(&self) -> &'a str {
999 pub fn slice_between(&self, until: StrCursor<'a>) -> Option<&'a str> {
1000 if !str_eq_literal(self.s, until.s) {
1003 use std::cmp::{max, min};
1004 let beg = min(self.at, until.at);
1005 let end = max(self.at, until.at);
1006 Some(&self.s[beg..end])
1010 fn cp_after(&self) -> Option<char> {
1011 self.slice_after().chars().next()
1014 fn try_seek_right_cp(&mut self) -> bool {
1015 match self.slice_after().chars().next() {
1017 self.at += c.len_utf8();
1024 fn seek_right(&mut self, bytes: usize) {
1029 impl<'a> Copy for StrCursor<'a> {}
1031 impl<'a> Clone for StrCursor<'a> {
1032 fn clone(&self) -> StrCursor<'a> {
1037 impl<'a> std::fmt::Debug for StrCursor<'a> {
1038 fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
1039 write!(fmt, "StrCursor({:?} | {:?})", self.slice_before(), self.slice_after())
1043 fn str_eq_literal(a: &str, b: &str) -> bool {
1044 a.as_bytes().as_ptr() == b.as_bytes().as_ptr()
1045 && a.len() == b.len()