1 // Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Parsing of format strings
13 //! These structures are used when parsing format strings for the compiler.
14 //! Parsing does not happen at runtime: structures of `std::fmt::rt` are
15 //! generated instead.
22 /// A piece is a portion of the format string which represents the next part
23 /// to emit. These are emitted as a stream by the `Parser` class.
26 /// A literal string which should directly be emitted
28 /// A back-reference to whatever the current argument is. This is used
29 /// inside of a method call to refer back to the original argument.
31 /// This describes that formatting should process the next argument (as
32 /// specified inside) for emission.
33 Argument(Argument<'a>),
36 /// Representation of an argument specification.
38 pub struct Argument<'a> {
39 /// Where to find this argument
40 pub position: Position<'a>,
41 /// How to format the argument
42 pub format: FormatSpec<'a>,
43 /// If not `None`, what method to invoke on the argument
44 pub method: Option<~Method<'a>>
47 /// Specification for the formatting of an argument in the format string.
49 pub struct FormatSpec<'a> {
50 /// Optionally specified character to fill alignment with
51 pub fill: Option<char>,
52 /// Optionally specified alignment
54 /// Packed version of various flags provided
56 /// The integer precision to use
57 pub precision: Count<'a>,
58 /// The string width requested for the resulting format
60 /// The descriptor string representing the name of the format desired for
61 /// this argument, this can be empty or any number of characters, although
62 /// it is required to be one word.
66 /// Enum describing where an argument for a format can be located.
68 pub enum Position<'a> {
69 /// The argument will be in the next position. This is the default.
71 /// The argument is located at a specific index.
73 /// The argument has a name.
74 ArgumentNamed(&'a str),
77 /// Enum of alignments which are supported.
80 /// The value will be aligned to the left.
82 /// The value will be aligned to the right.
84 /// The value will take on a default alignment.
88 /// Various flags which can be applied to format strings. The meaning of these
89 /// flags is defined by the formatters themselves.
92 /// A `+` will be used to denote positive numbers.
94 /// A `-` will be used to denote negative numbers. This is the default.
96 /// An alternate form will be used for the value. In the case of numbers,
97 /// this means that the number will be prefixed with the supplied string.
99 /// For numbers, this means that the number will be padded with zeroes,
100 /// and the sign (`+` or `-`) will precede them.
101 FlagSignAwareZeroPad,
104 /// A count is used for the precision and width parameters of an integer, and
105 /// can reference either an argument or a literal integer.
108 /// The count is specified explicitly.
110 /// The count is specified by the argument with the given name.
111 CountIsName(&'a str),
112 /// The count is specified by the argument at the given index.
114 /// The count is specified by the next parameter.
116 /// The count is implied and cannot be explicitly specified.
120 /// Enum describing all of the possible methods which the formatting language
121 /// currently supports.
123 pub enum Method<'a> {
124 /// A plural method selects on an integer over a list of either integer or
125 /// keyword-defined clauses. The meaning of the keywords is defined by the
128 /// An offset is optionally present at the beginning which is used to
129 /// match against keywords, but it is not matched against the literal
132 /// The final element of this enum is the default "other" case which is
133 /// always required to be specified.
134 Plural(Option<uint>, Vec<PluralArm<'a>>, Vec<Piece<'a>>),
136 /// A select method selects over a string. Each arm is a different string
137 /// which can be selected for.
139 /// As with `Plural`, a default "other" case is required as well.
140 Select(Vec<SelectArm<'a>>, Vec<Piece<'a>>),
143 /// A selector for what pluralization a plural method should take
144 #[deriving(Eq, TotalEq, Hash)]
145 pub enum PluralSelector {
146 /// One of the plural keywords should be used
147 Keyword(PluralKeyword),
148 /// A literal pluralization should be used
152 /// Structure representing one "arm" of the `plural` function.
154 pub struct PluralArm<'a> {
155 /// A selector can either be specified by a keyword or with an integer
157 pub selector: PluralSelector,
158 /// Array of pieces which are the format of this arm
159 pub result: Vec<Piece<'a>>,
162 /// Enum of the 5 CLDR plural keywords. There is one more, "other", but that
163 /// is specially placed in the `Plural` variant of `Method`.
165 /// http://www.icu-project.org/apiref/icu4c/classicu_1_1PluralRules.html
166 #[deriving(Eq, TotalEq, Hash)]
167 #[allow(missing_doc)]
168 pub enum PluralKeyword {
169 /// The plural form for zero objects.
171 /// The plural form for one object.
173 /// The plural form for two objects.
175 /// The plural form for few objects.
177 /// The plural form for many objects.
181 /// Structure representing one "arm" of the `select` function.
183 pub struct SelectArm<'a> {
184 /// String selector which guards this arm
185 pub selector: &'a str,
186 /// Array of pieces which are the format of this arm
187 pub result: Vec<Piece<'a>>,
190 /// The parser structure for interpreting the input format string. This is
191 /// modelled as an iterator over `Piece` structures to form a stream of tokens
194 /// This is a recursive-descent parser for the sake of simplicity, and if
195 /// necessary there's probably lots of room for improvement performance-wise.
196 pub struct Parser<'a> {
198 cur: str::CharOffsets<'a>,
200 /// Error messages accumulated during parsing
201 pub errors: Vec<~str>,
204 impl<'a> Iterator<Piece<'a>> for Parser<'a> {
205 fn next(&mut self) -> Option<Piece<'a>> {
206 match self.cur.clone().next() {
207 Some((_, '#')) => { self.cur.next(); Some(CurrentArgument) }
210 let ret = Some(Argument(self.argument()));
211 self.must_consume('}');
214 Some((pos, '\\')) => {
216 self.escape(); // ensure it's a valid escape sequence
217 Some(String(self.string(pos + 1))) // skip the '\' character
219 Some((_, '}')) if self.depth == 0 => {
221 self.err("unmatched `}` found");
224 Some((_, '}')) | None => { None }
226 Some(String(self.string(pos)))
232 impl<'a> Parser<'a> {
233 /// Creates a new parser for the given format string
234 pub fn new<'a>(s: &'a str) -> Parser<'a> {
237 cur: s.char_indices(),
243 /// Notifies of an error. The message doesn't actually need to be of type
244 /// ~str, but I think it does when this eventually uses conditions so it
245 /// might as well start using it now.
246 fn err(&mut self, msg: &str) {
247 self.errors.push(msg.to_owned());
250 /// Optionally consumes the specified character. If the character is not at
251 /// the current position, then the current iterator isn't moved and false is
252 /// returned, otherwise the character is consumed and true is returned.
253 fn consume(&mut self, c: char) -> bool {
254 match self.cur.clone().next() {
255 Some((_, maybe)) if c == maybe => {
259 Some(..) | None => false,
263 /// Forces consumption of the specified character. If the character is not
264 /// found, an error is emitted.
265 fn must_consume(&mut self, c: char) {
267 match self.cur.clone().next() {
268 Some((_, maybe)) if c == maybe => {
271 Some((_, other)) => {
273 format!("expected `{}` but found `{}`", c, other));
277 format!("expected `{}` but string was terminated", c));
282 /// Attempts to consume any amount of whitespace followed by a character
283 fn wsconsume(&mut self, c: char) -> bool {
284 self.ws(); self.consume(c)
287 /// Consumes all whitespace characters until the first non-whitespace
291 match self.cur.clone().next() {
292 Some((_, c)) if char::is_whitespace(c) => { self.cur.next(); }
293 Some(..) | None => { return }
298 /// Consumes an escape sequence, failing if there is not a valid character
300 fn escape(&mut self) -> char {
301 match self.cur.next() {
302 Some((_, c @ '#')) | Some((_, c @ '{')) |
303 Some((_, c @ '\\')) | Some((_, c @ '}')) => { c }
305 self.err(format!("invalid escape character `{}`", c));
309 self.err("expected an escape sequence, but format string was \
316 /// Parses all of a string which is to be considered a "raw literal" in a
317 /// format string. This is everything outside of the braces.
318 fn string(&mut self, start: uint) -> &'a str {
320 // we may not consume the character, so clone the iterator
321 match self.cur.clone().next() {
322 Some((pos, '\\')) | Some((pos, '#')) |
323 Some((pos, '}')) | Some((pos, '{')) => {
324 return self.input.slice(start, pos);
326 Some(..) => { self.cur.next(); }
329 return self.input.slice(start, self.input.len());
335 /// Parses an Argument structure, or what's contained within braces inside
336 /// the format string
337 fn argument(&mut self) -> Argument<'a> {
339 position: self.position(),
340 format: self.format(),
341 method: self.method(),
345 /// Parses a positional argument for a format. This could either be an
346 /// integer index of an argument, a named argument, or a blank string.
347 fn position(&mut self) -> Position<'a> {
348 match self.integer() {
349 Some(i) => { ArgumentIs(i) }
351 match self.cur.clone().next() {
352 Some((_, c)) if char::is_alphabetic(c) => {
353 ArgumentNamed(self.word())
361 /// Parses a format specifier at the current position, returning all of the
362 /// relevant information in the FormatSpec struct.
363 fn format(&mut self) -> FormatSpec<'a> {
364 let mut spec = FormatSpec {
368 precision: CountImplied,
370 ty: self.input.slice(0, 0),
372 if !self.consume(':') { return spec }
375 match self.cur.clone().next() {
377 match self.cur.clone().skip(1).next() {
378 Some((_, '>')) | Some((_, '<')) => {
382 Some(..) | None => {}
388 if self.consume('<') {
389 spec.align = AlignLeft;
390 } else if self.consume('>') {
391 spec.align = AlignRight;
394 if self.consume('+') {
395 spec.flags |= 1 << (FlagSignPlus as uint);
396 } else if self.consume('-') {
397 spec.flags |= 1 << (FlagSignMinus as uint);
400 if self.consume('#') {
401 spec.flags |= 1 << (FlagAlternate as uint);
403 // Width and precision
404 let mut havewidth = false;
405 if self.consume('0') {
406 // small ambiguity with '0$' as a format string. In theory this is a
407 // '0' flag and then an ill-formatted format string with just a '$'
408 // and no count, but this is better if we instead interpret this as
409 // no '0' flag and '0$' as the width instead.
410 if self.consume('$') {
411 spec.width = CountIsParam(0);
414 spec.flags |= 1 << (FlagSignAwareZeroPad as uint);
418 spec.width = self.count();
420 if self.consume('.') {
421 if self.consume('*') {
422 spec.precision = CountIsNextParam;
424 spec.precision = self.count();
427 // Finally the actual format specifier
428 if self.consume('?') {
431 spec.ty = self.word();
436 /// Parses a method to be applied to the previously specified argument and
437 /// its format. The two current supported methods are 'plural' and 'select'
438 fn method(&mut self) -> Option<~Method<'a>> {
439 if !self.wsconsume(',') {
445 self.must_consume(',');
449 self.must_consume(',');
453 self.err("expected method after comma");
457 self.err(format!("unknown method: `{}`", method));
463 /// Parses a 'select' statement (after the initial 'select' word)
464 fn select(&mut self) -> ~Method<'a> {
465 let mut other = None;
466 let mut arms = vec!();
467 // Consume arms one at a time
470 let selector = self.word();
472 self.err("cannot have an empty selector");
475 self.must_consume('{');
477 let pieces = self.collect();
479 self.must_consume('}');
480 if selector == "other" {
481 if !other.is_none() {
482 self.err("multiple `other` statements in `select");
484 other = Some(pieces);
486 arms.push(SelectArm { selector: selector, result: pieces });
489 match self.cur.clone().next() {
490 Some((_, '}')) => { break }
491 Some(..) | None => {}
494 // The "other" selector must be present
495 let other = match other {
498 self.err("`select` statement must provide an `other` case");
502 box Select(arms, other)
505 /// Parses a 'plural' statement (after the initial 'plural' word)
506 fn plural(&mut self) -> ~Method<'a> {
507 let mut offset = None;
508 let mut other = None;
509 let mut arms = vec!();
511 // First, attempt to parse the 'offset:' field. We know the set of
512 // selector words which can appear in plural arms, and the only ones
513 // which start with 'o' are "other" and "offset", hence look two
514 // characters deep to see if we can consume the word "offset"
516 let mut it = self.cur.clone();
521 let word = self.word();
522 if word != "offset" {
523 self.err(format!("expected `offset`, found `{}`",
526 self.must_consume(':');
527 match self.integer() {
528 Some(i) => { offset = Some(i); }
530 self.err("offset must be an integer");
535 Some(..) | None => {}
538 Some(..) | None => {}
541 // Next, generate all the arms
543 let mut isother = false;
544 let selector = if self.wsconsume('=') {
545 match self.integer() {
546 Some(i) => Literal(i),
548 self.err("plural `=` selectors must be followed by an \
554 let word = self.word();
556 "other" => { isother = true; Keyword(Zero) }
557 "zero" => Keyword(Zero),
558 "one" => Keyword(One),
559 "two" => Keyword(Two),
560 "few" => Keyword(Few),
561 "many" => Keyword(Many),
563 self.err(format!("unexpected plural selector `{}`",
573 self.must_consume('{');
575 let pieces = self.collect();
577 self.must_consume('}');
579 if !other.is_none() {
580 self.err("multiple `other` statements in `select");
582 other = Some(pieces);
584 arms.push(PluralArm { selector: selector, result: pieces });
587 match self.cur.clone().next() {
588 Some((_, '}')) => { break }
589 Some(..) | None => {}
593 let other = match other {
596 self.err("`plural` statement must provide an `other` case");
600 box Plural(offset, arms, other)
603 /// Parses a Count parameter at the current position. This does not check
604 /// for 'CountIsNextParam' because that is only used in precision, not
606 fn count(&mut self) -> Count<'a> {
607 match self.integer() {
609 if self.consume('$') {
616 let tmp = self.cur.clone();
618 word if word.len() > 0 && self.consume('$') => {
630 /// Parses a word starting at the current position. A word is considered to
631 /// be an alphabetic character followed by any number of alphanumeric
633 fn word(&mut self) -> &'a str {
634 let start = match self.cur.clone().next() {
635 Some((pos, c)) if char::is_XID_start(c) => {
639 Some(..) | None => { return self.input.slice(0, 0); }
643 match self.cur.clone().next() {
644 Some((_, c)) if char::is_XID_continue(c) => {
647 Some((pos, _)) => { end = pos; break }
648 None => { end = self.input.len(); break }
651 self.input.slice(start, end)
654 /// Optionally parses an integer at the current position. This doesn't deal
655 /// with overflow at all, it's just accumulating digits.
656 fn integer(&mut self) -> Option<uint> {
658 let mut found = false;
660 match self.cur.clone().next() {
662 match char::to_digit(c, 10) {
687 fn same(fmt: &'static str, p: &[Piece<'static>]) {
688 let mut parser = Parser::new(fmt);
689 assert!(p == parser.collect::<Vec<Piece<'static>>>().as_slice());
692 fn fmtdflt() -> FormatSpec<'static> {
697 precision: CountImplied,
703 fn musterr(s: &str) {
704 let mut p = Parser::new(s);
706 assert!(p.errors.len() != 0);
711 same("asdf", [String("asdf")]);
712 same("a\\{b", [String("a"), String("{b")]);
713 same("a\\#b", [String("a"), String("#b")]);
714 same("a\\}b", [String("a"), String("}b")]);
715 same("a\\}", [String("a"), String("}")]);
716 same("\\}", [String("}")]);
719 #[test] fn invalid01() { musterr("{") }
720 #[test] fn invalid02() { musterr("\\") }
721 #[test] fn invalid03() { musterr("\\a") }
722 #[test] fn invalid04() { musterr("{3a}") }
723 #[test] fn invalid05() { musterr("{:|}") }
724 #[test] fn invalid06() { musterr("{:>>>}") }
727 fn format_nothing() {
728 same("{}", [Argument(Argument {
729 position: ArgumentNext,
735 fn format_position() {
736 same("{3}", [Argument(Argument {
737 position: ArgumentIs(3),
743 fn format_position_nothing_else() {
744 same("{3:}", [Argument(Argument {
745 position: ArgumentIs(3),
752 same("{3:a}", [Argument(Argument {
753 position: ArgumentIs(3),
758 precision: CountImplied,
766 fn format_align_fill() {
767 same("{3:>}", [Argument(Argument {
768 position: ArgumentIs(3),
773 precision: CountImplied,
779 same("{3:0<}", [Argument(Argument {
780 position: ArgumentIs(3),
785 precision: CountImplied,
791 same("{3:*<abcd}", [Argument(Argument {
792 position: ArgumentIs(3),
797 precision: CountImplied,
806 same("{:10s}", [Argument(Argument {
807 position: ArgumentNext,
812 precision: CountImplied,
818 same("{:10$.10s}", [Argument(Argument {
819 position: ArgumentNext,
824 precision: CountIs(10),
825 width: CountIsParam(10),
830 same("{:.*s}", [Argument(Argument {
831 position: ArgumentNext,
836 precision: CountIsNextParam,
842 same("{:.10$s}", [Argument(Argument {
843 position: ArgumentNext,
848 precision: CountIsParam(10),
854 same("{:a$.b$s}", [Argument(Argument {
855 position: ArgumentNext,
860 precision: CountIsName("b"),
861 width: CountIsName("a"),
869 same("{:-}", [Argument(Argument {
870 position: ArgumentNext,
874 flags: (1 << FlagSignMinus as uint),
875 precision: CountImplied,
881 same("{:+#}", [Argument(Argument {
882 position: ArgumentNext,
886 flags: (1 << FlagSignPlus as uint) | (1 << FlagAlternate as uint),
887 precision: CountImplied,
895 fn format_mixture() {
896 same("abcd {3:a} efg", [String("abcd "), Argument(Argument {
897 position: ArgumentIs(3),
902 precision: CountImplied,
907 }), String(" efg")]);
912 same("{, select, other { haha } }", [Argument(Argument{
913 position: ArgumentNext,
915 method: Some(box Select(vec![], vec![String(" haha ")]))
917 same("{1, select, other { haha } }", [Argument(Argument{
918 position: ArgumentIs(1),
920 method: Some(box Select(vec![], vec![String(" haha ")]))
922 same("{1, select, other {#} }", [Argument(Argument{
923 position: ArgumentIs(1),
925 method: Some(box Select(vec![], vec![CurrentArgument]))
927 same("{1, select, other {{2, select, other {lol}}} }", [Argument(Argument{
928 position: ArgumentIs(1),
930 method: Some(box Select(vec![], vec![Argument(Argument{
931 position: ArgumentIs(2),
933 method: Some(box Select(vec![], vec![String("lol")]))
940 same("{1, select, a{1} b{2} c{3} other{4} }", [Argument(Argument{
941 position: ArgumentIs(1),
943 method: Some(box Select(vec![
944 SelectArm{ selector: "a", result: vec![String("1")] },
945 SelectArm{ selector: "b", result: vec![String("2")] },
946 SelectArm{ selector: "c", result: vec![String("3")] },
947 ], vec![String("4")]))
951 #[test] fn badselect01() { musterr("{select, }") }
952 #[test] fn badselect02() { musterr("{1, select}") }
953 #[test] fn badselect03() { musterr("{1, select, }") }
954 #[test] fn badselect04() { musterr("{1, select, a {}}") }
955 #[test] fn badselect05() { musterr("{1, select, other }}") }
956 #[test] fn badselect06() { musterr("{1, select, other {}") }
957 #[test] fn badselect07() { musterr("{select, other {}") }
958 #[test] fn badselect08() { musterr("{1 select, other {}") }
959 #[test] fn badselect09() { musterr("{:d select, other {}") }
960 #[test] fn badselect10() { musterr("{1:d select, other {}") }
964 same("{, plural, other { haha } }", [Argument(Argument{
965 position: ArgumentNext,
967 method: Some(box Plural(None, vec![], vec![String(" haha ")]))
969 same("{:, plural, other { haha } }", [Argument(Argument{
970 position: ArgumentNext,
972 method: Some(box Plural(None, vec![], vec![String(" haha ")]))
974 same("{, plural, offset:1 =2{2} =3{3} many{yes} other{haha} }",
976 position: ArgumentNext,
978 method: Some(box Plural(Some(1), vec![
979 PluralArm{ selector: Literal(2), result: vec![String("2")] },
980 PluralArm{ selector: Literal(3), result: vec![String("3")] },
981 PluralArm{ selector: Keyword(Many), result: vec![String("yes")] }
982 ], vec![String("haha")]))