1 // Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Parsing of format strings
13 //! These structures are used when parsing format strings for the compiler.
14 //! Parsing does not currently happen at runtime (structures of std::fmt::rt are
15 //! generated instead).
22 /// A piece is a portion of the format string which represents the next part to
23 /// emit. These are emitted as a stream by the `Parser` class.
26 /// A literal string which should directly be emitted
28 /// A back-reference to whatever the current argument is. This is used
29 /// inside of a method call to refer back to the original argument.
31 /// This describes that formatting should process the next argument (as
32 /// specified inside) for emission.
33 Argument(Argument<'a>),
36 /// Representation of an argument specification.
38 pub struct Argument<'a> {
39 /// Where to find this argument
40 position: Position<'a>,
41 /// How to format the argument
42 format: FormatSpec<'a>,
43 /// If not `None`, what method to invoke on the argument
44 method: Option<~Method<'a>>
47 /// Specification for the formatting of an argument in the format string.
49 pub struct FormatSpec<'a> {
50 /// Optionally specified character to fill alignment with
52 /// Optionally specified alignment
54 /// Packed version of various flags provided
56 /// The integer precision to use
58 /// The string width requested for the resulting format
60 /// The descriptor string representing the name of the format desired for
61 /// this argument, this can be empty or any number of characters, although
62 /// it is required to be one word.
66 /// Enum describing where an argument for a format can be located.
69 pub enum Position<'a> {
70 ArgumentNext, ArgumentIs(uint), ArgumentNamed(&'a str)
73 /// Enum of alignments which are supported.
76 pub enum Alignment { AlignLeft, AlignRight, AlignUnknown }
78 /// Various flags which can be applied to format strings, the meaning of these
79 /// flags is defined by the formatters themselves.
89 /// A count is used for the precision and width parameters of an integer, and
90 /// can reference either an argument or a literal integer.
101 /// Enum describing all of the possible methods which the formatting language
102 /// currently supports.
104 pub enum Method<'a> {
105 /// A plural method selects on an integer over a list of either integer or
106 /// keyword-defined clauses. The meaning of the keywords is defined by the
109 /// An offset is optionally present at the beginning which is used to match
110 /// against keywords, but it is not matched against the literal integers.
112 /// The final element of this enum is the default "other" case which is
113 /// always required to be specified.
114 Plural(Option<uint>, ~[PluralArm<'a>], ~[Piece<'a>]),
116 /// A select method selects over a string. Each arm is a different string
117 /// which can be selected for.
119 /// As with `Plural`, a default "other" case is required as well.
120 Select(~[SelectArm<'a>], ~[Piece<'a>]),
123 /// A selector for what pluralization a plural method should take
124 #[deriving(Eq, Hash)]
125 pub enum PluralSelector {
126 /// One of the plural keywords should be used
127 Keyword(PluralKeyword),
128 /// A literal pluralization should be used
132 /// Structure representing one "arm" of the `plural` function.
134 pub struct PluralArm<'a> {
135 /// A selector can either be specified by a keyword or with an integer
137 selector: PluralSelector,
138 /// Array of pieces which are the format of this arm
139 result: ~[Piece<'a>],
142 /// Enum of the 5 CLDR plural keywords. There is one more, "other", but that is
143 /// specially placed in the `Plural` variant of `Method`
145 /// http://www.icu-project.org/apiref/icu4c/classicu_1_1PluralRules.html
146 #[deriving(Eq, Hash)]
147 #[allow(missing_doc)]
148 pub enum PluralKeyword {
149 Zero, One, Two, Few, Many
152 /// Structure representing one "arm" of the `select` function.
154 pub struct SelectArm<'a> {
155 /// String selector which guards this arm
157 /// Array of pieces which are the format of this arm
158 result: ~[Piece<'a>],
161 /// The parser structure for interpreting the input format string. This is
162 /// modelled as an iterator over `Piece` structures to form a stream of tokens
165 /// This is a recursive-descent parser for the sake of simplicity, and if
166 /// necessary there's probably lots of room for improvement performance-wise.
167 pub struct Parser<'a> {
169 priv cur: str::CharOffsets<'a>,
171 /// Error messages accumulated during parsing
175 impl<'a> Iterator<Piece<'a>> for Parser<'a> {
176 fn next(&mut self) -> Option<Piece<'a>> {
177 match self.cur.clone().next() {
178 Some((_, '#')) => { self.cur.next(); Some(CurrentArgument) }
181 let ret = Some(Argument(self.argument()));
182 self.must_consume('}');
185 Some((pos, '\\')) => {
187 self.escape(); // ensure it's a valid escape sequence
188 Some(String(self.string(pos + 1))) // skip the '\' character
190 Some((_, '}')) if self.depth == 0 => {
192 self.err("unmatched `}` found");
195 Some((_, '}')) | None => { None }
197 Some(String(self.string(pos)))
203 impl<'a> Parser<'a> {
204 /// Creates a new parser for the given format string
205 pub fn new<'a>(s: &'a str) -> Parser<'a> {
208 cur: s.char_indices(),
214 /// Notifies of an error. The message doesn't actually need to be of type
215 /// ~str, but I think it does when this eventually uses conditions so it
216 /// might as well start using it now.
217 fn err(&mut self, msg: &str) {
218 self.errors.push(msg.to_owned());
221 /// Optionally consumes the specified character. If the character is not at
222 /// the current position, then the current iterator isn't moved and false is
223 /// returned, otherwise the character is consumed and true is returned.
224 fn consume(&mut self, c: char) -> bool {
225 match self.cur.clone().next() {
226 Some((_, maybe)) if c == maybe => {
230 Some(..) | None => false,
234 /// Forces consumption of the specified character. If the character is not
235 /// found, an error is emitted.
236 fn must_consume(&mut self, c: char) {
238 match self.cur.clone().next() {
239 Some((_, maybe)) if c == maybe => {
242 Some((_, other)) => {
244 format!("expected `{}` but found `{}`", c, other));
248 format!("expected `{}` but string was terminated", c));
253 /// Attempts to consume any amount of whitespace followed by a character
254 fn wsconsume(&mut self, c: char) -> bool {
255 self.ws(); self.consume(c)
258 /// Consumes all whitespace characters until the first non-whitespace
262 match self.cur.clone().next() {
263 Some((_, c)) if char::is_whitespace(c) => { self.cur.next(); }
264 Some(..) | None => { return }
269 /// Consumes an escape sequence, failing if there is not a valid character
271 fn escape(&mut self) -> char {
272 match self.cur.next() {
273 Some((_, c @ '#')) | Some((_, c @ '{')) |
274 Some((_, c @ '\\')) | Some((_, c @ '}')) => { c }
276 self.err(format!("invalid escape character `{}`", c));
280 self.err("expected an escape sequence, but format string was \
287 /// Parses all of a string which is to be considered a "raw literal" in a
288 /// format string. This is everything outside of the braces.
289 fn string(&mut self, start: uint) -> &'a str {
291 // we may not consume the character, so clone the iterator
292 match self.cur.clone().next() {
293 Some((pos, '\\')) | Some((pos, '#')) |
294 Some((pos, '}')) | Some((pos, '{')) => {
295 return self.input.slice(start, pos);
297 Some(..) => { self.cur.next(); }
300 return self.input.slice(start, self.input.len());
306 /// Parses an Argument structure, or what's contained within braces inside
307 /// the format string
308 fn argument(&mut self) -> Argument<'a> {
310 position: self.position(),
311 format: self.format(),
312 method: self.method(),
316 /// Parses a positional argument for a format. This could either be an
317 /// integer index of an argument, a named argument, or a blank string.
318 fn position(&mut self) -> Position<'a> {
319 match self.integer() {
320 Some(i) => { ArgumentIs(i) }
322 match self.cur.clone().next() {
323 Some((_, c)) if char::is_alphabetic(c) => {
324 ArgumentNamed(self.word())
332 /// Parses a format specifier at the current position, returning all of the
333 /// relevant information in the FormatSpec struct.
334 fn format(&mut self) -> FormatSpec<'a> {
335 let mut spec = FormatSpec {
339 precision: CountImplied,
341 ty: self.input.slice(0, 0),
343 if !self.consume(':') { return spec }
346 match self.cur.clone().next() {
348 match self.cur.clone().skip(1).next() {
349 Some((_, '>')) | Some((_, '<')) => {
353 Some(..) | None => {}
359 if self.consume('<') {
360 spec.align = AlignLeft;
361 } else if self.consume('>') {
362 spec.align = AlignRight;
365 if self.consume('+') {
366 spec.flags |= 1 << (FlagSignPlus as uint);
367 } else if self.consume('-') {
368 spec.flags |= 1 << (FlagSignMinus as uint);
371 if self.consume('#') {
372 spec.flags |= 1 << (FlagAlternate as uint);
374 // Width and precision
375 let mut havewidth = false;
376 if self.consume('0') {
377 // small ambiguity with '0$' as a format string. In theory this is a
378 // '0' flag and then an ill-formatted format string with just a '$'
379 // and no count, but this is better if we instead interpret this as
380 // no '0' flag and '0$' as the width instead.
381 if self.consume('$') {
382 spec.width = CountIsParam(0);
385 spec.flags |= 1 << (FlagSignAwareZeroPad as uint);
389 spec.width = self.count();
391 if self.consume('.') {
392 if self.consume('*') {
393 spec.precision = CountIsNextParam;
395 spec.precision = self.count();
398 // Finally the actual format specifier
399 if self.consume('?') {
402 spec.ty = self.word();
407 /// Parses a method to be applied to the previously specified argument and
408 /// its format. The two current supported methods are 'plural' and 'select'
409 fn method(&mut self) -> Option<~Method<'a>> {
410 if !self.wsconsume(',') {
416 self.must_consume(',');
420 self.must_consume(',');
424 self.err("expected method after comma");
428 self.err(format!("unknown method: `{}`", method));
434 /// Parses a 'select' statement (after the initial 'select' word)
435 fn select(&mut self) -> ~Method<'a> {
436 let mut other = None;
438 // Consume arms one at a time
441 let selector = self.word();
443 self.err("cannot have an empty selector");
446 self.must_consume('{');
448 let pieces = self.collect();
450 self.must_consume('}');
451 if selector == "other" {
452 if !other.is_none() {
453 self.err("multiple `other` statements in `select");
455 other = Some(pieces);
457 arms.push(SelectArm { selector: selector, result: pieces });
460 match self.cur.clone().next() {
461 Some((_, '}')) => { break }
462 Some(..) | None => {}
465 // The "other" selector must be present
466 let other = match other {
469 self.err("`select` statement must provide an `other` case");
476 /// Parses a 'plural' statement (after the initial 'plural' word)
477 fn plural(&mut self) -> ~Method<'a> {
478 let mut offset = None;
479 let mut other = None;
482 // First, attempt to parse the 'offset:' field. We know the set of
483 // selector words which can appear in plural arms, and the only ones
484 // which start with 'o' are "other" and "offset", hence look two
485 // characters deep to see if we can consume the word "offset"
487 let mut it = self.cur.clone();
492 let word = self.word();
493 if word != "offset" {
494 self.err(format!("expected `offset`, found `{}`",
497 self.must_consume(':');
498 match self.integer() {
499 Some(i) => { offset = Some(i); }
501 self.err("offset must be an integer");
506 Some(..) | None => {}
509 Some(..) | None => {}
512 // Next, generate all the arms
514 let mut isother = false;
515 let selector = if self.wsconsume('=') {
516 match self.integer() {
517 Some(i) => Literal(i),
519 self.err("plural `=` selectors must be followed by an \
525 let word = self.word();
527 "other" => { isother = true; Keyword(Zero) }
528 "zero" => Keyword(Zero),
529 "one" => Keyword(One),
530 "two" => Keyword(Two),
531 "few" => Keyword(Few),
532 "many" => Keyword(Many),
534 self.err(format!("unexpected plural selector `{}`",
544 self.must_consume('{');
546 let pieces = self.collect();
548 self.must_consume('}');
550 if !other.is_none() {
551 self.err("multiple `other` statements in `select");
553 other = Some(pieces);
555 arms.push(PluralArm { selector: selector, result: pieces });
558 match self.cur.clone().next() {
559 Some((_, '}')) => { break }
560 Some(..) | None => {}
564 let other = match other {
567 self.err("`plural` statement must provide an `other` case");
571 ~Plural(offset, arms, other)
574 /// Parses a Count parameter at the current position. This does not check
575 /// for 'CountIsNextParam' because that is only used in precision, not
577 fn count(&mut self) -> Count<'a> {
578 match self.integer() {
580 if self.consume('$') {
587 let tmp = self.cur.clone();
589 word if word.len() > 0 && self.consume('$') => {
601 /// Parses a word starting at the current position. A word is considered to
602 /// be an alphabetic character followed by any number of alphanumeric
604 fn word(&mut self) -> &'a str {
605 let start = match self.cur.clone().next() {
606 Some((pos, c)) if char::is_XID_start(c) => {
610 Some(..) | None => { return self.input.slice(0, 0); }
614 match self.cur.clone().next() {
615 Some((_, c)) if char::is_XID_continue(c) => {
618 Some((pos, _)) => { end = pos; break }
619 None => { end = self.input.len(); break }
622 self.input.slice(start, end)
625 /// Optionally parses an integer at the current position. This doesn't deal
626 /// with overflow at all, it's just accumulating digits.
627 fn integer(&mut self) -> Option<uint> {
629 let mut found = false;
631 match self.cur.clone().next() {
633 match char::to_digit(c, 10) {
658 fn same(fmt: &'static str, p: ~[Piece<'static>]) {
659 let mut parser = Parser::new(fmt);
660 assert!(p == parser.collect());
663 fn fmtdflt() -> FormatSpec<'static> {
668 precision: CountImplied,
674 fn musterr(s: &str) {
675 let mut p = Parser::new(s);
677 assert!(p.errors.len() != 0);
682 same("asdf", ~[String("asdf")]);
683 same("a\\{b", ~[String("a"), String("{b")]);
684 same("a\\#b", ~[String("a"), String("#b")]);
685 same("a\\}b", ~[String("a"), String("}b")]);
686 same("a\\}", ~[String("a"), String("}")]);
687 same("\\}", ~[String("}")]);
690 #[test] fn invalid01() { musterr("{") }
691 #[test] fn invalid02() { musterr("\\") }
692 #[test] fn invalid03() { musterr("\\a") }
693 #[test] fn invalid04() { musterr("{3a}") }
694 #[test] fn invalid05() { musterr("{:|}") }
695 #[test] fn invalid06() { musterr("{:>>>}") }
698 fn format_nothing() {
699 same("{}", ~[Argument(Argument {
700 position: ArgumentNext,
706 fn format_position() {
707 same("{3}", ~[Argument(Argument {
708 position: ArgumentIs(3),
714 fn format_position_nothing_else() {
715 same("{3:}", ~[Argument(Argument {
716 position: ArgumentIs(3),
723 same("{3:a}", ~[Argument(Argument {
724 position: ArgumentIs(3),
729 precision: CountImplied,
737 fn format_align_fill() {
738 same("{3:>}", ~[Argument(Argument {
739 position: ArgumentIs(3),
744 precision: CountImplied,
750 same("{3:0<}", ~[Argument(Argument {
751 position: ArgumentIs(3),
756 precision: CountImplied,
762 same("{3:*<abcd}", ~[Argument(Argument {
763 position: ArgumentIs(3),
768 precision: CountImplied,
777 same("{:10s}", ~[Argument(Argument {
778 position: ArgumentNext,
783 precision: CountImplied,
789 same("{:10$.10s}", ~[Argument(Argument {
790 position: ArgumentNext,
795 precision: CountIs(10),
796 width: CountIsParam(10),
801 same("{:.*s}", ~[Argument(Argument {
802 position: ArgumentNext,
807 precision: CountIsNextParam,
813 same("{:.10$s}", ~[Argument(Argument {
814 position: ArgumentNext,
819 precision: CountIsParam(10),
825 same("{:a$.b$s}", ~[Argument(Argument {
826 position: ArgumentNext,
831 precision: CountIsName("b"),
832 width: CountIsName("a"),
840 same("{:-}", ~[Argument(Argument {
841 position: ArgumentNext,
845 flags: (1 << FlagSignMinus as uint),
846 precision: CountImplied,
852 same("{:+#}", ~[Argument(Argument {
853 position: ArgumentNext,
857 flags: (1 << FlagSignPlus as uint) | (1 << FlagAlternate as uint),
858 precision: CountImplied,
866 fn format_mixture() {
867 same("abcd {3:a} efg", ~[String("abcd "), Argument(Argument {
868 position: ArgumentIs(3),
873 precision: CountImplied,
878 }), String(" efg")]);
883 same("{, select, other { haha } }", ~[Argument(Argument{
884 position: ArgumentNext,
886 method: Some(~Select(~[], ~[String(" haha ")]))
888 same("{1, select, other { haha } }", ~[Argument(Argument{
889 position: ArgumentIs(1),
891 method: Some(~Select(~[], ~[String(" haha ")]))
893 same("{1, select, other {#} }", ~[Argument(Argument{
894 position: ArgumentIs(1),
896 method: Some(~Select(~[], ~[CurrentArgument]))
898 same("{1, select, other {{2, select, other {lol}}} }", ~[Argument(Argument{
899 position: ArgumentIs(1),
901 method: Some(~Select(~[], ~[Argument(Argument{
902 position: ArgumentIs(2),
904 method: Some(~Select(~[], ~[String("lol")]))
911 same("{1, select, a{1} b{2} c{3} other{4} }", ~[Argument(Argument{
912 position: ArgumentIs(1),
914 method: Some(~Select(~[
915 SelectArm{ selector: "a", result: ~[String("1")] },
916 SelectArm{ selector: "b", result: ~[String("2")] },
917 SelectArm{ selector: "c", result: ~[String("3")] },
922 #[test] fn badselect01() { musterr("{select, }") }
923 #[test] fn badselect02() { musterr("{1, select}") }
924 #[test] fn badselect03() { musterr("{1, select, }") }
925 #[test] fn badselect04() { musterr("{1, select, a {}}") }
926 #[test] fn badselect05() { musterr("{1, select, other }}") }
927 #[test] fn badselect06() { musterr("{1, select, other {}") }
928 #[test] fn badselect07() { musterr("{select, other {}") }
929 #[test] fn badselect08() { musterr("{1 select, other {}") }
930 #[test] fn badselect09() { musterr("{:d select, other {}") }
931 #[test] fn badselect10() { musterr("{1:d select, other {}") }
935 same("{, plural, other { haha } }", ~[Argument(Argument{
936 position: ArgumentNext,
938 method: Some(~Plural(None, ~[], ~[String(" haha ")]))
940 same("{:, plural, other { haha } }", ~[Argument(Argument{
941 position: ArgumentNext,
943 method: Some(~Plural(None, ~[], ~[String(" haha ")]))
945 same("{, plural, offset:1 =2{2} =3{3} many{yes} other{haha} }",
947 position: ArgumentNext,
949 method: Some(~Plural(Some(1), ~[
950 PluralArm{ selector: Literal(2), result: ~[String("2")] },
951 PluralArm{ selector: Literal(3), result: ~[String("3")] },
952 PluralArm{ selector: Keyword(Many), result: ~[String("yes")] }
953 ], ~[String("haha")]))