2 use super::strcursor::StrCursor as Cur;
3 use rustc_span::InnerSpan;
5 /// Represents a single `printf`-style substitution.
6 #[derive(Clone, PartialEq, Debug)]
7 pub enum Substitution<'a> {
8 /// A formatted output substitution with its internal byte offset.
10 /// A literal `%%` escape.
14 impl<'a> Substitution<'a> {
15 pub fn as_str(&self) -> &str {
17 Substitution::Format(ref fmt) => fmt.span,
18 Substitution::Escape => "%%",
22 pub fn position(&self) -> Option<InnerSpan> {
24 Substitution::Format(ref fmt) => Some(fmt.position),
29 pub fn set_position(&mut self, start: usize, end: usize) {
31 Substitution::Format(ref mut fmt) => {
32 fmt.position = InnerSpan::new(start, end);
38 /// Translate this substitution into an equivalent Rust formatting directive.
40 /// This ignores cases where the substitution does not have an exact equivalent, or where
41 /// the substitution would be unnecessary.
42 pub fn translate(&self) -> Option<String> {
44 Substitution::Format(ref fmt) => fmt.translate(),
45 Substitution::Escape => None,
50 #[derive(Clone, PartialEq, Debug)]
51 /// A single `printf`-style formatting directive.
52 pub struct Format<'a> {
53 /// The entire original formatting directive.
55 /// The (1-based) parameter to be converted.
56 pub parameter: Option<u16>,
59 /// Minimum width of the output.
60 pub width: Option<Num>,
61 /// Precision of the conversion.
62 pub precision: Option<Num>,
63 /// Length modifier for the conversion.
64 pub length: Option<&'a str>,
65 /// Type of parameter being converted.
67 /// Byte offset for the start and end of this formatting directive.
68 pub position: InnerSpan,
72 /// Translate this directive into an equivalent Rust formatting directive.
74 /// Returns `None` in cases where the `printf` directive does not have an exact Rust
75 /// equivalent, rather than guessing.
76 pub fn translate(&self) -> Option<String> {
79 let (c_alt, c_zero, c_left, c_plus) = {
80 let mut c_alt = false;
81 let mut c_zero = false;
82 let mut c_left = false;
83 let mut c_plus = false;
84 for c in self.flags.chars() {
93 (c_alt, c_zero, c_left, c_plus)
96 // Has a special form in Rust for numbers.
97 let fill = c_zero.then_some("0");
99 let align = c_left.then_some("<");
101 // Rust doesn't have an equivalent to the `' '` flag.
102 let sign = c_plus.then_some("+");
104 // Not *quite* the same, depending on the type...
107 let width = match self.width {
109 // NOTE: Rust doesn't support this.
112 w @ Some(Num::Arg(_)) => w,
113 w @ Some(Num::Num(_)) => w,
117 let precision = self.precision;
119 // NOTE: although length *can* have an effect, we can't duplicate the effect in Rust, so
120 // we just ignore it.
122 let (type_, use_zero_fill, is_int) = match self.type_ {
123 "d" | "i" | "u" => (None, true, true),
124 "f" | "F" => (None, false, false),
125 "s" | "c" => (None, false, false),
126 "e" | "E" => (Some(self.type_), true, false),
127 "x" | "X" | "o" => (Some(self.type_), true, true),
128 "p" => (Some(self.type_), false, true),
129 "g" => (Some("e"), true, false),
130 "G" => (Some("E"), true, false),
134 let (fill, width, precision) = match (is_int, width, precision) {
135 (true, Some(_), Some(_)) => {
136 // Rust can't duplicate this insanity.
139 (true, None, Some(p)) => (Some("0"), Some(p), None),
140 (true, w, None) => (fill, w, None),
141 (false, w, p) => (fill, w, p),
144 let align = match (self.type_, width.is_some(), align.is_some()) {
145 ("s", true, false) => Some(">"),
149 let (fill, zero_fill) = match (fill, use_zero_fill) {
150 (Some("0"), true) => (None, true),
151 (fill, _) => (fill, false),
154 let alt = match type_ {
155 Some("x") | Some("X") => alt,
159 let has_options = fill.is_some()
165 || precision.is_some()
168 // Initialise with a rough guess.
169 let cap = self.span.len() + if has_options { 2 } else { 0 };
170 let mut s = String::with_capacity(cap);
174 if let Some(arg) = self.parameter {
175 write!(s, "{}", arg.checked_sub(1)?).ok()?;
181 let align = if let Some(fill) = fill {
188 if let Some(align) = align {
192 if let Some(sign) = sign {
204 if let Some(width) = width {
205 width.translate(&mut s).ok()?;
208 if let Some(precision) = precision {
210 precision.translate(&mut s).ok()?;
213 if let Some(type_) = type_ {
223 /// A general number used in a `printf` formatting directive.
224 #[derive(Copy, Clone, PartialEq, Debug)]
226 // The range of these values is technically bounded by `NL_ARGMAX`... but, at least for GNU
227 // libc, it apparently has no real fixed limit. A `u16` is used here on the basis that it
228 // is *vanishingly* unlikely that *anyone* is going to try formatting something wider, or
229 // with more precision, than 32 thousand positions which is so wide it couldn't possibly fit
231 /// A specific, fixed value.
233 /// The value is derived from a positional argument.
235 /// The value is derived from the "next" unconverted argument.
240 fn from_str(s: &str, arg: Option<&str>) -> Self {
241 if let Some(arg) = arg {
242 Num::Arg(arg.parse().unwrap_or_else(|_| panic!("invalid format arg `{:?}`", arg)))
246 Num::Num(s.parse().unwrap_or_else(|_| panic!("invalid format num `{:?}`", s)))
250 fn translate(&self, s: &mut String) -> std::fmt::Result {
253 Num::Num(n) => write!(s, "{}", n),
255 let n = n.checked_sub(1).ok_or(std::fmt::Error)?;
258 Num::Next => write!(s, "*"),
263 /// Returns an iterator over all substitutions in a given string.
264 pub fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> {
265 Substitutions { s, pos: start_pos }
268 /// Iterator over substitutions in a string.
269 pub struct Substitutions<'a> {
274 impl<'a> Iterator for Substitutions<'a> {
275 type Item = Substitution<'a>;
276 fn next(&mut self) -> Option<Self::Item> {
277 let (mut sub, tail) = parse_next_substitution(self.s)?;
280 Substitution::Format(_) => {
281 if let Some(inner_span) = sub.position() {
282 sub.set_position(inner_span.start + self.pos, inner_span.end + self.pos);
283 self.pos += inner_span.end;
286 Substitution::Escape => self.pos += 2,
291 fn size_hint(&self) -> (usize, Option<usize>) {
292 // Substitutions are at least 2 characters long.
293 (0, Some(self.s.len() / 2))
308 /// Parse the next substitution from the input string.
309 pub fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> {
313 let start = s.find('%')?;
314 match s[start + 1..].chars().next()? {
315 '%' => return Some((Substitution::Escape, &s[start + 2..])),
316 _ => { /* fall-through */ }
319 Cur::new_at(&s[..], start)
322 // This is meant to be a translation of the following regex:
327 // (?: (?P<parameter> \d+) \$ )?
328 // (?P<flags> [-+ 0\#']* )
329 // (?P<width> \d+ | \* (?: (?P<widtha> \d+) \$ )? )?
330 // (?: \. (?P<precision> \d+ | \* (?: (?P<precisiona> \d+) \$ )? ) )?
333 // hh | h | ll | l | L | z | j | t
336 // | I32 | I64 | I | q
341 // Used to establish the full span at the end.
343 // The current position within the string.
344 let mut at = at.at_next_cp()?;
345 // `c` is the next codepoint, `next` is a cursor after it.
346 let (mut c, mut next) = at.next_cp()?;
348 // Update `at`, `c`, and `next`, exiting if we're out of input.
349 macro_rules! move_to {
352 let (c_, next_) = at.next_cp()?;
358 // Constructs a result when parsing fails.
360 // Note: `move` used to capture copies of the cursors as they are *now*.
361 let fallback = move || {
363 Substitution::Format(Format {
364 span: start.slice_between(next).unwrap(),
370 type_: at.slice_between(next).unwrap(),
371 position: InnerSpan::new(start.at, next.at),
377 // Next parsing state.
378 let mut state = Start;
380 // Sadly, Rust isn't *quite* smart enough to know these *must* be initialised by the end.
381 let mut parameter: Option<u16> = None;
382 let mut flags: &str = "";
383 let mut width: Option<Num> = None;
384 let mut precision: Option<Num> = None;
385 let mut length: Option<&str> = None;
386 let mut type_: &str = "";
389 if let Start = state {
392 let end = at_next_cp_while(next, is_digit);
393 match end.next_cp() {
394 // Yes, this *is* the parameter.
395 Some(('$', end2)) => {
397 parameter = Some(at.slice_between(end).unwrap().parse().unwrap());
400 // Wait, no, actually, it's the width.
405 width = Some(Num::from_str(at.slice_between(end).unwrap(), None));
408 // It's invalid, is what it is.
409 None => return fallback(),
420 if let Flags = state {
421 let end = at_next_cp_while(at, is_flag);
423 flags = at.slice_between(end).unwrap();
427 if let Width = state {
434 let end = at_next_cp_while(next, is_digit);
436 width = Some(Num::from_str(at.slice_between(end).unwrap(), None));
447 if let WidthArg = state {
448 let end = at_next_cp_while(at, is_digit);
449 match end.next_cp() {
450 Some(('$', end2)) => {
452 width = Some(Num::from_str("", Some(at.slice_between(end).unwrap())));
457 width = Some(Num::Next);
463 if let Prec = state {
477 if let PrecInner = state {
480 let end = at_next_cp_while(next, is_digit);
481 match end.next_cp() {
482 Some(('$', end2)) => {
484 precision = Some(Num::from_str("*", next.slice_between(end)));
489 precision = Some(Num::Next);
495 let end = at_next_cp_while(next, is_digit);
497 precision = Some(Num::from_str(at.slice_between(end).unwrap(), None));
500 _ => return fallback(),
504 if let Length = state {
505 let c1_next1 = next.next_cp();
506 match (c, c1_next1) {
507 ('h', Some(('h', next1))) | ('l', Some(('l', next1))) => {
509 length = Some(at.slice_between(next1).unwrap());
513 ('h', _) | ('l', _) | ('L', _) | ('z', _) | ('j', _) | ('t', _) | ('q', _) => {
515 length = Some(at.slice_between(next).unwrap());
522 .and_then(|end| end.at_next_cp())
523 .map(|end| (next.slice_between(end).unwrap(), end));
524 let end = match end {
525 Some(("32", end)) => end,
526 Some(("64", end)) => end,
530 length = Some(at.slice_between(end).unwrap());
542 if let Type = state {
544 type_ = at.slice_between(next).unwrap();
546 // Don't use `move_to!` here, as we *can* be at the end of the input.
554 let position = InnerSpan::new(start.at, end.at);
557 span: start.slice_between(end).unwrap(),
566 Some((Substitution::Format(f), end.slice_after()))
569 fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_>
571 F: FnMut(char) -> bool,
574 match cur.next_cp() {
587 fn is_digit(c: char) -> bool {
594 fn is_flag(c: char) -> bool {
596 '0' | '-' | '+' | ' ' | '#' | '\'' => true,
606 use super::strcursor::StrCursor as Cur;
607 use rustc_span::InnerSpan;
609 #[derive(Clone, PartialEq, Debug)]
610 pub enum Substitution<'a> {
611 Ordinal(u8, (usize, usize)),
612 Name(&'a str, (usize, usize)),
613 Escape((usize, usize)),
616 impl Substitution<'_> {
617 pub fn as_str(&self) -> String {
619 Substitution::Ordinal(n, _) => format!("${}", n),
620 Substitution::Name(n, _) => format!("${}", n),
621 Substitution::Escape(_) => "$$".into(),
625 pub fn position(&self) -> Option<InnerSpan> {
627 Substitution::Ordinal(_, pos)
628 | Substitution::Name(_, pos)
629 | Substitution::Escape(pos) => Some(InnerSpan::new(pos.0, pos.1)),
633 pub fn set_position(&mut self, start: usize, end: usize) {
635 Substitution::Ordinal(_, ref mut pos)
636 | Substitution::Name(_, ref mut pos)
637 | Substitution::Escape(ref mut pos) => *pos = (start, end),
641 pub fn translate(&self) -> Option<String> {
643 Substitution::Ordinal(n, _) => Some(format!("{{{}}}", n)),
644 Substitution::Name(n, _) => Some(format!("{{{}}}", n)),
645 Substitution::Escape(_) => None,
650 /// Returns an iterator over all substitutions in a given string.
651 pub fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> {
652 Substitutions { s, pos: start_pos }
655 /// Iterator over substitutions in a string.
656 pub struct Substitutions<'a> {
661 impl<'a> Iterator for Substitutions<'a> {
662 type Item = Substitution<'a>;
663 fn next(&mut self) -> Option<Self::Item> {
664 match parse_next_substitution(self.s) {
665 Some((mut sub, tail)) => {
667 if let Some(InnerSpan { start, end }) = sub.position() {
668 sub.set_position(start + self.pos, end + self.pos);
677 fn size_hint(&self) -> (usize, Option<usize>) {
678 (0, Some(self.s.len()))
682 /// Parse the next substitution from the input string.
683 pub fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> {
685 let start = s.find('$')?;
686 match s[start + 1..].chars().next()? {
687 '$' => return Some((Substitution::Escape((start, start + 2)), &s[start + 2..])),
689 let n = (c as u8) - b'0';
690 return Some((Substitution::Ordinal(n, (start, start + 2)), &s[start + 2..]));
692 _ => { /* fall-through */ }
695 Cur::new_at(&s[..], start)
698 let at = at.at_next_cp()?;
699 let (c, inner) = at.next_cp()?;
701 if !is_ident_head(c) {
704 let end = at_next_cp_while(inner, is_ident_tail);
705 let slice = at.slice_between(end).unwrap();
706 let start = at.at - 1;
707 let end_pos = at.at + slice.len();
708 Some((Substitution::Name(slice, (start, end_pos)), end.slice_after()))
712 fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_>
714 F: FnMut(char) -> bool,
717 match cur.next_cp() {
730 fn is_ident_head(c: char) -> bool {
732 'a'..='z' | 'A'..='Z' | '_' => true,
737 fn is_ident_tail(c: char) -> bool {
740 c => is_ident_head(c),
749 pub struct StrCursor<'a> {
754 impl<'a> StrCursor<'a> {
755 pub fn new_at(s: &'a str, at: usize) -> StrCursor<'a> {
759 pub fn at_next_cp(mut self) -> Option<StrCursor<'a>> {
760 match self.try_seek_right_cp() {
766 pub fn next_cp(mut self) -> Option<(char, StrCursor<'a>)> {
767 let cp = self.cp_after()?;
768 self.seek_right(cp.len_utf8());
772 fn slice_before(&self) -> &'a str {
776 pub fn slice_after(&self) -> &'a str {
780 pub fn slice_between(&self, until: StrCursor<'a>) -> Option<&'a str> {
781 if !str_eq_literal(self.s, until.s) {
784 use std::cmp::{max, min};
785 let beg = min(self.at, until.at);
786 let end = max(self.at, until.at);
787 Some(&self.s[beg..end])
791 fn cp_after(&self) -> Option<char> {
792 self.slice_after().chars().next()
795 fn try_seek_right_cp(&mut self) -> bool {
796 match self.slice_after().chars().next() {
798 self.at += c.len_utf8();
805 fn seek_right(&mut self, bytes: usize) {
810 impl Copy for StrCursor<'_> {}
812 impl<'a> Clone for StrCursor<'a> {
813 fn clone(&self) -> StrCursor<'a> {
818 impl std::fmt::Debug for StrCursor<'_> {
819 fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
820 write!(fmt, "StrCursor({:?} | {:?})", self.slice_before(), self.slice_after())
824 fn str_eq_literal(a: &str, b: &str) -> bool {
825 a.as_bytes().as_ptr() == b.as_bytes().as_ptr() && a.len() == b.len()