probe CFG_JAVAC javac
probe CFG_ANTLR4 antlr4
probe CFG_GRUN grun
+probe CFG_FLEX flex
+probe CFG_BISON bison
probe CFG_PANDOC pandoc
probe CFG_PDFLATEX pdflatex
probe CFG_XELATEX xelatex
TARGET_CRATES := libc std flate arena term \
serialize getopts collections test rand \
- log regex graphviz core rbml alloc \
+ log graphviz core rbml alloc \
unicode rustc_bitflags
RUSTC_CRATES := rustc rustc_typeck rustc_borrowck rustc_resolve rustc_driver \
rustc_trans rustc_back rustc_llvm rustc_privacy
DEPS_getopts := std
DEPS_collections := core alloc unicode
DEPS_num := std
-DEPS_test := std getopts serialize rbml term regex native:rust_test_helpers
+DEPS_test := std getopts serialize rbml term native:rust_test_helpers
DEPS_rand := core
-DEPS_log := std regex
-DEPS_regex := std
+DEPS_log := std
DEPS_fmt_macros = std
TOOL_DEPS_compiletest := test getopts
TOOL_DEPS_rustdoc := rustdoc
TOOL_DEPS_rustc := rustc_driver
-TOOL_DEPS_rustbook := std regex rustdoc
+TOOL_DEPS_rustbook := std rustdoc
TOOL_SOURCE_compiletest := $(S)src/compiletest/compiletest.rs
TOOL_SOURCE_rustdoc := $(S)src/driver/driver.rs
TOOL_SOURCE_rustc := $(S)src/driver/driver.rs
$(filter-out rustc_driver, \
$(filter-out rustc_privacy, \
$(filter-out log, \
- $(filter-out regex, \
$(filter-out getopts, \
- $(filter-out syntax, $(CRATES))))))))))))
+ $(filter-out syntax, $(CRATES)))))))))))
COMPILER_DOC_CRATES := rustc rustc_trans rustc_borrowck rustc_resolve \
rustc_typeck rustc_driver syntax rustc_privacy
L = $(B)lib/rustlib/$(CFG_BUILD)/lib
LD = $(CFG_BUILD)/stage2/lib/rustlib/$(CFG_BUILD)/lib/
RUSTC = $(STAGE2_T_$(CFG_BUILD)_H_$(CFG_BUILD))
+ifeq ($(CFG_OSTYPE),apple-darwin)
+ FLEX_LDFLAGS=-ll
+else
+ FLEX_LDFLAGS=-lfl
+endif
# Run the reference lexer against libsyntax and compare the tokens and spans.
# If "// ignore-lexer-test" is present in the file, it will be ignored.
check-lexer:
endif
+
+$(BG)lex.yy.c: $(SG)lexer.l $(BG)
+ @$(call E, flex: $@)
+ $(Q)$(CFG_FLEX) -o $@ $<
+
+$(BG)lexer-lalr.o: $(BG)lex.yy.c $(BG)parser-lalr.tab.h
+ @$(call E, cc: $@)
+ $(Q)$(CFG_CC) -include $(BG)parser-lalr.tab.h -c -o $@ $<
+
+$(BG)parser-lalr.tab.c $(BG)parser-lalr.tab.h: $(SG)parser-lalr.y
+ @$(call E, bison: $@)
+ $(Q)$(CFG_BISON) $< --output=$(BG)parser-lalr.tab.c --defines=$(BG)parser-lalr.tab.h \
+ --name-prefix=rs --warnings=error=all
+
+$(BG)parser-lalr.o: $(BG)parser-lalr.tab.c
+ @$(call E, cc: $@)
+ $(Q)$(CFG_CC) -c -o $@ $<
+
+$(BG)parser-lalr-main.o: $(SG)parser-lalr-main.c
+ @$(call E, cc: $@)
+ $(Q)$(CFG_CC) -std=c99 -c -o $@ $<
+
+$(BG)parser-lalr: $(BG)parser-lalr.o $(BG)parser-lalr-main.o $(BG)lexer-lalr.o
+ @$(call E, cc: $@)
+ $(Q)$(CFG_CC) -o $@ $^ $(FLEX_LDFLAGS)
+
+
+ifdef CFG_FLEX
+ifdef CFG_BISON
+check-grammar: $(BG) $(BG)parser-lalr
+ $(info Verifying grammar ...)
+ $(SG)testparser.py -p $(BG)parser-lalr -s $(S)src
+
+else
+$(info cfg: bison not available, skipping parser test...)
+check-grammar:
+
+endif
+else
+$(info cfg: flex not available, skipping parser test...)
+check-grammar:
+
+endif
use std::fmt;
use std::str::FromStr;
-use regex::Regex;
#[derive(Clone, PartialEq, Debug)]
pub enum Mode {
pub run_ignored: bool,
// Only run tests that match this filter
- pub filter: Option<Regex>,
-
- // Precompiled regex for finding expected errors in cfail
- pub cfail_regex: Regex,
+ pub filter: Option<String>,
// Write out a parseable log of tests that were run
pub logfile: Option<Path>,
#[macro_use]
extern crate log;
-extern crate regex;
use std::os;
use std::io;
use common::Config;
use common::{Pretty, DebugInfoGdb, DebugInfoLldb, Codegen};
use util::logv;
-use regex::Regex;
pub mod procsrv;
pub mod util;
}
let filter = if !matches.free.is_empty() {
- let s = matches.free[0].as_slice();
- match regex::Regex::new(s) {
- Ok(re) => Some(re),
- Err(e) => {
- println!("failed to parse filter /{}/: {:?}", s, e);
- panic!()
- }
- }
+ Some(matches.free[0].clone())
} else {
None
};
.as_slice()).expect("invalid mode"),
run_ignored: matches.opt_present("ignored"),
filter: filter,
- cfail_regex: Regex::new(errors::EXPECTED_PATTERN).unwrap(),
logfile: matches.opt_str("logfile").map(|s| Path::new(s)),
runtool: matches.opt_str("runtool"),
host_rustcflags: matches.opt_str("host-rustcflags"),
if full_version_line.as_slice().trim().len() > 0 => {
let full_version_line = full_version_line.as_slice().trim();
- let re = Regex::new(r"(^|[^0-9])([0-9]\.[0-9])([^0-9]|$)").unwrap();
-
- match re.captures(full_version_line) {
- Some(captures) => {
- Some(captures.at(2).unwrap_or("").to_string())
+ // used to be a regex "(^|[^0-9])([0-9]\.[0-9])([^0-9]|$)"
+ for (pos, c) in full_version_line.char_indices() {
+ if !c.is_digit(10) { continue }
+ if pos + 2 >= full_version_line.len() { continue }
+ if full_version_line.char_at(pos + 1) != '.' { continue }
+ if !full_version_line.char_at(pos + 2).is_digit(10) { continue }
+ if pos > 0 && full_version_line.char_at_reverse(pos).is_digit(10) {
+ continue
}
- None => {
- println!("Could not extract GDB version from line '{}'",
- full_version_line);
- None
+ if pos + 3 < full_version_line.len() &&
+ full_version_line.char_at(pos + 3).is_digit(10) {
+ continue
}
+ return Some(full_version_line[pos..pos+3].to_string());
}
+ println!("Could not extract GDB version from line '{}'",
+ full_version_line);
+ None
},
_ => None
}
if full_version_line.as_slice().trim().len() > 0 => {
let full_version_line = full_version_line.as_slice().trim();
- let re = Regex::new(r"[Ll][Ll][Dd][Bb]-([0-9]+)").unwrap();
-
- match re.captures(full_version_line) {
- Some(captures) => {
- Some(captures.at(1).unwrap_or("").to_string())
- }
- None => {
- println!("Could not extract LLDB version from line '{}'",
- full_version_line);
- None
- }
+ for (pos, l) in full_version_line.char_indices() {
+ if l != 'l' && l != 'L' { continue }
+ if pos + 5 >= full_version_line.len() { continue }
+ let l = full_version_line.char_at(pos + 1);
+ if l != 'l' && l != 'L' { continue }
+ let d = full_version_line.char_at(pos + 2);
+ if d != 'd' && d != 'D' { continue }
+ let b = full_version_line.char_at(pos + 3);
+ if b != 'b' && b != 'B' { continue }
+ let dash = full_version_line.char_at(pos + 4);
+ if dash != '-' { continue }
+
+ let vers = full_version_line[pos + 5..].chars().take_while(|c| {
+ c.is_digit(10)
+ }).collect::<String>();
+ if vers.len() > 0 { return Some(vers) }
}
+ println!("Could not extract LLDB version from line '{}'",
+ full_version_line);
+ None
},
_ => None
}
// except according to those terms.
use self::WhichLine::*;
-use std::ascii::AsciiExt;
use std::io::{BufferedReader, File};
-use regex::Regex;
pub struct ExpectedError {
pub line: uint,
pub msg: String,
}
+#[derive(PartialEq, Show)]
+enum WhichLine { ThisLine, FollowPrevious(uint), AdjustBackward(uint) }
+
/// Looks for either "//~| KIND MESSAGE" or "//~^^... KIND MESSAGE"
/// The former is a "follow" that inherits its target from the preceding line;
/// the latter is an "adjusts" that goes that many lines up.
/// Goal is to enable tests both like: //~^^^ ERROR go up three
/// and also //~^ ERROR message one for the preceding line, and
/// //~| ERROR message two for that same line.
-
-pub static EXPECTED_PATTERN : &'static str =
- r"//~(?P<follow>\|)?(?P<adjusts>\^*)\s*(?P<kind>\S*)\s*(?P<msg>.*)";
-
-#[derive(PartialEq, Show)]
-enum WhichLine { ThisLine, FollowPrevious(uint), AdjustBackward(uint) }
-
// Load any test directives embedded in the file
-pub fn load_errors(re: &Regex, testfile: &Path) -> Vec<ExpectedError> {
+pub fn load_errors(testfile: &Path) -> Vec<ExpectedError> {
let mut rdr = BufferedReader::new(File::open(testfile).unwrap());
// `last_nonfollow_error` tracks the most recently seen
rdr.lines().enumerate().filter_map(|(line_no, ln)| {
parse_expected(last_nonfollow_error,
line_no + 1,
- ln.unwrap().as_slice(), re)
+ ln.unwrap().as_slice())
.map(|(which, error)| {
match which {
FollowPrevious(_) => {}
fn parse_expected(last_nonfollow_error: Option<uint>,
line_num: uint,
- line: &str,
- re: &Regex) -> Option<(WhichLine, ExpectedError)> {
- re.captures(line).and_then(|caps| {
- let adjusts = caps.name("adjusts").unwrap_or("").len();
- let kind = caps.name("kind").unwrap_or("").to_ascii_lowercase();
- let msg = caps.name("msg").unwrap_or("").trim().to_string();
- let follow = caps.name("follow").unwrap_or("").len() > 0;
+ line: &str) -> Option<(WhichLine, ExpectedError)> {
+ let start = match line.find_str("//~") { Some(i) => i, None => return None };
+ let (follow, adjusts) = if line.char_at(start + 3) == '|' {
+ (true, 0)
+ } else {
+ (false, line[start + 3..].chars().take_while(|c| *c == '^').count())
+ };
+ let kind_start = start + 3 + adjusts + (follow as usize);
+ let letters = line[kind_start..].chars();
+ let kind = letters.skip_while(|c| c.is_whitespace())
+ .take_while(|c| !c.is_whitespace())
+ .map(|c| c.to_lowercase())
+ .collect::<String>();
+ let letters = line[kind_start..].chars();
+ let msg = letters.skip_while(|c| c.is_whitespace())
+ .skip_while(|c| !c.is_whitespace())
+ .collect::<String>().trim().to_string();
- let (which, line) = if follow {
- assert!(adjusts == 0, "use either //~| or //~^, not both.");
- let line = last_nonfollow_error.unwrap_or_else(|| {
- panic!("encountered //~| without preceding //~^ line.")
- });
- (FollowPrevious(line), line)
- } else {
- let which =
- if adjusts > 0 { AdjustBackward(adjusts) } else { ThisLine };
- let line = line_num - adjusts;
- (which, line)
- };
+ let (which, line) = if follow {
+ assert!(adjusts == 0, "use either //~| or //~^, not both.");
+ let line = last_nonfollow_error.unwrap_or_else(|| {
+ panic!("encountered //~| without preceding //~^ line.")
+ });
+ (FollowPrevious(line), line)
+ } else {
+ let which =
+ if adjusts > 0 { AdjustBackward(adjusts) } else { ThisLine };
+ let line = line_num - adjusts;
+ (which, line)
+ };
- debug!("line={} which={:?} kind={:?} msg={:?}", line_num, which, kind, msg);
- Some((which, ExpectedError { line: line,
- kind: kind,
- msg: msg, }))
- })
+ debug!("line={} which={:?} kind={:?} msg={:?}", line_num, which, kind, msg);
+ Some((which, ExpectedError { line: line,
+ kind: kind,
+ msg: msg, }))
}
}
let output_to_check = get_output(props, &proc_res);
- let expected_errors = errors::load_errors(&config.cfail_regex, testfile);
+ let expected_errors = errors::load_errors(testfile);
if !expected_errors.is_empty() {
if !props.error_patterns.is_empty() {
fatal("both error pattern and expected errors specified");
--- /dev/null
+%{
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#include <stdio.h>
+#include <ctype.h>
+
+static int num_hashes;
+static int end_hashes;
+static int saw_non_hash;
+
+%}
+
+%option stack
+%option yylineno
+
+%x str
+%x rawstr
+%x rawstr_esc_begin
+%x rawstr_esc_body
+%x rawstr_esc_end
+%x byte
+%x bytestr
+%x rawbytestr
+%x rawbytestr_nohash
+%x pound
+%x shebang_or_attr
+%x ltorchar
+%x linecomment
+%x doc_line
+%x blockcomment
+%x doc_block
+%x suffix
+
+ident [a-zA-Z\x80-\xff_][a-zA-Z0-9\x80-\xff_]*
+
+%%
+
+<suffix>{ident} { BEGIN(INITIAL); }
+<suffix>(.|\n) { yyless(0); BEGIN(INITIAL); }
+
+[ \n\t\r] { }
+
+\xef\xbb\xbf {
+ // UTF-8 byte order mark (BOM), ignore if in line 1, error otherwise
+ if (yyget_lineno() != 1) {
+ return -1;
+ }
+}
+
+\/\/(\/|\!) { BEGIN(doc_line); yymore(); }
+<doc_line>\n { BEGIN(INITIAL);
+ yyleng--;
+ yytext[yyleng] = 0;
+ return ((yytext[2] == '!') ? INNER_DOC_COMMENT : OUTER_DOC_COMMENT);
+ }
+<doc_line>[^\n]* { yymore(); }
+
+\/\/|\/\/\/\/ { BEGIN(linecomment); }
+<linecomment>\n { BEGIN(INITIAL); }
+<linecomment>[^\n]* { }
+
+\/\*(\*|\!)[^*] { yy_push_state(INITIAL); yy_push_state(doc_block); yymore(); }
+<doc_block>\/\* { yy_push_state(doc_block); yymore(); }
+<doc_block>\*\/ {
+ yy_pop_state();
+ if (yy_top_state() == doc_block) {
+ yymore();
+ } else {
+ return ((yytext[2] == '!') ? INNER_DOC_COMMENT : OUTER_DOC_COMMENT);
+ }
+}
+<doc_block>(.|\n) { yymore(); }
+
+\/\* { yy_push_state(blockcomment); }
+<blockcomment>\/\* { yy_push_state(blockcomment); }
+<blockcomment>\*\/ { yy_pop_state(); }
+<blockcomment>(.|\n) { }
+
+_ { return UNDERSCORE; }
+as { return AS; }
+box { return BOX; }
+break { return BREAK; }
+const { return CONST; }
+continue { return CONTINUE; }
+crate { return CRATE; }
+else { return ELSE; }
+enum { return ENUM; }
+extern { return EXTERN; }
+false { return FALSE; }
+fn { return FN; }
+for { return FOR; }
+if { return IF; }
+impl { return IMPL; }
+in { return IN; }
+let { return LET; }
+loop { return LOOP; }
+match { return MATCH; }
+mod { return MOD; }
+move { return MOVE; }
+mut { return MUT; }
+priv { return PRIV; }
+proc { return PROC; }
+pub { return PUB; }
+ref { return REF; }
+return { return RETURN; }
+self { return SELF; }
+static { return STATIC; }
+struct { return STRUCT; }
+trait { return TRAIT; }
+true { return TRUE; }
+type { return TYPE; }
+typeof { return TYPEOF; }
+unsafe { return UNSAFE; }
+use { return USE; }
+where { return WHERE; }
+while { return WHILE; }
+
+{ident} { return IDENT; }
+
+0x[0-9a-fA-F_]+ { BEGIN(suffix); return LIT_INTEGER; }
+0o[0-8_]+ { BEGIN(suffix); return LIT_INTEGER; }
+0b[01_]+ { BEGIN(suffix); return LIT_INTEGER; }
+[0-9][0-9_]* { BEGIN(suffix); return LIT_INTEGER; }
+[0-9][0-9_]*\.(\.|[a-zA-Z]) { yyless(yyleng - 2); BEGIN(suffix); return LIT_INTEGER; }
+
+[0-9][0-9_]*\.[0-9_]*([eE][-\+]?[0-9_]+)? { BEGIN(suffix); return LIT_FLOAT; }
+[0-9][0-9_]*(\.[0-9_]*)?[eE][-\+]?[0-9_]+ { BEGIN(suffix); return LIT_FLOAT; }
+
+; { return ';'; }
+, { return ','; }
+\.\.\. { return DOTDOTDOT; }
+\.\. { return DOTDOT; }
+\. { return '.'; }
+\( { return '('; }
+\) { return ')'; }
+\{ { return '{'; }
+\} { return '}'; }
+\[ { return '['; }
+\] { return ']'; }
+@ { return '@'; }
+# { BEGIN(pound); yymore(); }
+<pound>\! { BEGIN(shebang_or_attr); yymore(); }
+<shebang_or_attr>\[ {
+ BEGIN(INITIAL);
+ yyless(2);
+ return SHEBANG;
+}
+<shebang_or_attr>[^\[\n]*\n {
+ // Since the \n was eaten as part of the token, yylineno will have
+ // been incremented to the value 2 if the shebang was on the first
+ // line. This yyless undoes that, setting yylineno back to 1.
+ yyless(yyleng - 1);
+ if (yyget_lineno() == 1) {
+ BEGIN(INITIAL);
+ return SHEBANG_LINE;
+ } else {
+ BEGIN(INITIAL);
+ yyless(2);
+ return SHEBANG;
+ }
+}
+<pound>. { BEGIN(INITIAL); yyless(1); return '#'; }
+
+\~ { return '~'; }
+:: { return MOD_SEP; }
+: { return ':'; }
+\$ { return '$'; }
+\? { return '?'; }
+
+== { return EQEQ; }
+=> { return FAT_ARROW; }
+= { return '='; }
+\!= { return NE; }
+\! { return '!'; }
+\<= { return LE; }
+\<\< { return SHL; }
+\<\<= { return SHLEQ; }
+\< { return '<'; }
+\>= { return GE; }
+\>\> { return SHR; }
+\>\>= { return SHREQ; }
+\> { return '>'; }
+
+\x27 { BEGIN(ltorchar); yymore(); }
+<ltorchar>static { BEGIN(INITIAL); return STATIC_LIFETIME; }
+<ltorchar>{ident} { BEGIN(INITIAL); return LIFETIME; }
+<ltorchar>\\[nrt\\\x27\x220]\x27 { BEGIN(suffix); return LIT_CHAR; }
+<ltorchar>\\x[0-9a-fA-F]{2}\x27 { BEGIN(suffix); return LIT_CHAR; }
+<ltorchar>\\u\{[0-9a-fA-F]?{6}\}\x27 { BEGIN(suffix); return LIT_CHAR; }
+<ltorchar>.\x27 { BEGIN(suffix); return LIT_CHAR; }
+<ltorchar>[\x80-\xff]{2,4}\x27 { BEGIN(suffix); return LIT_CHAR; }
+<ltorchar><<EOF>> { BEGIN(INITIAL); return -1; }
+
+b\x22 { BEGIN(bytestr); yymore(); }
+<bytestr>\x22 { BEGIN(suffix); return LIT_BINARY; }
+
+<bytestr><<EOF>> { return -1; }
+<bytestr>\\[n\nrt\\\x27\x220] { yymore(); }
+<bytestr>\\x[0-9a-fA-F]{2} { yymore(); }
+<bytestr>\\u\{[0-9a-fA-F]?{6}\} { yymore(); }
+<bytestr>\\[^n\nrt\\\x27\x220] { return -1; }
+<bytestr>(.|\n) { yymore(); }
+
+br\x22 { BEGIN(rawbytestr_nohash); yymore(); }
+<rawbytestr_nohash>\x22 { BEGIN(suffix); return LIT_BINARY_RAW; }
+<rawbytestr_nohash>(.|\n) { yymore(); }
+<rawbytestr_nohash><<EOF>> { return -1; }
+
+br/# {
+ BEGIN(rawbytestr);
+ yymore();
+ num_hashes = 0;
+ saw_non_hash = 0;
+ end_hashes = 0;
+}
+<rawbytestr># {
+ if (!saw_non_hash) {
+ num_hashes++;
+ } else if (end_hashes != 0) {
+ end_hashes++;
+ if (end_hashes == num_hashes) {
+ BEGIN(INITIAL);
+ return LIT_BINARY_RAW;
+ }
+ }
+ yymore();
+}
+<rawbytestr>\x22# {
+ end_hashes = 1;
+ if (end_hashes == num_hashes) {
+ BEGIN(INITIAL);
+ return LIT_BINARY_RAW;
+ }
+ yymore();
+}
+<rawbytestr>(.|\n) {
+ if (!saw_non_hash) {
+ saw_non_hash = 1;
+ }
+ if (end_hashes != 0) {
+ end_hashes = 0;
+ }
+ yymore();
+}
+<rawbytestr><<EOF>> { return -1; }
+
+b\x27 { BEGIN(byte); yymore(); }
+<byte>\\[nrt\\\x27\x220]\x27 { BEGIN(INITIAL); return LIT_BYTE; }
+<byte>\\x[0-9a-fA-F]{2}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
+<byte>\\u[0-9a-fA-F]{4}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
+<byte>\\U[0-9a-fA-F]{8}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
+<byte>.\x27 { BEGIN(INITIAL); return LIT_BYTE; }
+<byte><<EOF>> { BEGIN(INITIAL); return -1; }
+
+r\x22 { BEGIN(rawstr); yymore(); }
+<rawstr>\x22 { BEGIN(suffix); return LIT_STR_RAW; }
+<rawstr>(.|\n) { yymore(); }
+<rawstr><<EOF>> { return -1; }
+
+r/# {
+ BEGIN(rawstr_esc_begin);
+ yymore();
+ num_hashes = 0;
+ saw_non_hash = 0;
+ end_hashes = 0;
+}
+
+<rawstr_esc_begin># {
+ num_hashes++;
+ yymore();
+}
+<rawstr_esc_begin>\x22 {
+ BEGIN(rawstr_esc_body);
+ yymore();
+}
+<rawstr_esc_begin>(.|\n) { return -1; }
+
+<rawstr_esc_body>\x22/# {
+ BEGIN(rawstr_esc_end);
+ yymore();
+ }
+<rawstr_esc_body>(.|\n) {
+ yymore();
+ }
+
+<rawstr_esc_end># {
+ end_hashes++;
+ if (end_hashes == num_hashes) {
+ BEGIN(INITIAL);
+ return LIT_STR_RAW;
+ }
+ yymore();
+ }
+<rawstr_esc_end>[^#] {
+ end_hashes = 0;
+ BEGIN(rawstr_esc_body);
+ yymore();
+ }
+
+<rawstr_esc_begin,rawstr_esc_body,rawstr_esc_end><<EOF>> { return -1; }
+
+\x22 { BEGIN(str); yymore(); }
+<str>\x22 { BEGIN(suffix); return LIT_STR; }
+
+<str><<EOF>> { return -1; }
+<str>\\[n\nrt\\\x27\x220] { yymore(); }
+<str>\\x[0-9a-fA-F]{2} { yymore(); }
+<str>\\u\{[0-9a-fA-F]?{6}\} { yymore(); }
+<str>\\[^n\nrt\\\x27\x220] { return -1; }
+<str>(.|\n) { yymore(); }
+
+-\> { return RARROW; }
+- { return '-'; }
+-= { return MINUSEQ; }
+&& { return ANDAND; }
+& { return '&'; }
+&= { return ANDEQ; }
+\|\| { return OROR; }
+\| { return '|'; }
+\|= { return OREQ; }
+\+ { return '+'; }
+\+= { return PLUSEQ; }
+\* { return '*'; }
+\*= { return STAREQ; }
+\/ { return '/'; }
+\/= { return SLASHEQ; }
+\^ { return '^'; }
+\^= { return CARETEQ; }
+% { return '%'; }
+%= { return PERCENTEQ; }
+
+<<EOF>> { return 0; }
+
+%%
--- /dev/null
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+
+extern int yylex();
+extern int rsparse();
+
+#define PUSHBACK_LEN 4
+
+static char pushback[PUSHBACK_LEN];
+static int verbose;
+
+void print(const char* format, ...) {
+ va_list args;
+ va_start(args, format);
+ if (verbose) {
+ vprintf(format, args);
+ }
+ va_end(args);
+}
+
+// If there is a non-null char at the head of the pushback queue,
+// dequeue it and shift the rest of the queue forwards. Otherwise,
+// return the token from calling yylex.
+int rslex() {
+ if (pushback[0] == '\0') {
+ return yylex();
+ } else {
+ char c = pushback[0];
+ memmove(pushback, pushback + 1, PUSHBACK_LEN - 1);
+ pushback[PUSHBACK_LEN - 1] = '\0';
+ return c;
+ }
+}
+
+// Note: this does nothing if the pushback queue is full. As long as
+// there aren't more than PUSHBACK_LEN consecutive calls to push_back
+// in an action, this shouldn't be a problem.
+void push_back(char c) {
+ for (int i = 0; i < PUSHBACK_LEN; ++i) {
+ if (pushback[i] == '\0') {
+ pushback[i] = c;
+ break;
+ }
+ }
+}
+
+extern int rsdebug;
+
+struct node {
+ struct node *next;
+ struct node *prev;
+ int own_string;
+ char const *name;
+ int n_elems;
+ struct node *elems[];
+};
+
+struct node *nodes = NULL;
+int n_nodes;
+
+struct node *mk_node(char const *name, int n, ...) {
+ va_list ap;
+ int i = 0;
+ unsigned sz = sizeof(struct node) + (n * sizeof(struct node *));
+ struct node *nn, *nd = (struct node *)malloc(sz);
+
+ print("# New %d-ary node: %s = %p\n", n, name, nd);
+
+ nd->own_string = 0;
+ nd->prev = NULL;
+ nd->next = nodes;
+ if (nodes) {
+ nodes->prev = nd;
+ }
+ nodes = nd;
+
+ nd->name = name;
+ nd->n_elems = n;
+
+ va_start(ap, n);
+ while (i < n) {
+ nn = va_arg(ap, struct node *);
+ print("# arg[%d]: %p\n", i, nn);
+ print("# (%s ...)\n", nn->name);
+ nd->elems[i++] = nn;
+ }
+ va_end(ap);
+ n_nodes++;
+ return nd;
+}
+
+struct node *mk_atom(char *name) {
+ struct node *nd = mk_node((char const *)strdup(name), 0);
+ nd->own_string = 1;
+ return nd;
+}
+
+struct node *mk_none() {
+ return mk_atom("<none>");
+}
+
+struct node *ext_node(struct node *nd, int n, ...) {
+ va_list ap;
+ int i = 0, c = nd->n_elems + n;
+ unsigned sz = sizeof(struct node) + (c * sizeof(struct node *));
+ struct node *nn;
+
+ print("# Extending %d-ary node by %d nodes: %s = %p",
+ nd->n_elems, c, nd->name, nd);
+
+ if (nd->next) {
+ nd->next->prev = nd->prev;
+ }
+ if (nd->prev) {
+ nd->prev->next = nd->next;
+ }
+ nd = realloc(nd, sz);
+ nd->prev = NULL;
+ nd->next = nodes;
+ nodes->prev = nd;
+ nodes = nd;
+
+ print(" ==> %p\n", nd);
+
+ va_start(ap, n);
+ while (i < n) {
+ nn = va_arg(ap, struct node *);
+ print("# arg[%d]: %p\n", i, nn);
+ print("# (%s ...)\n", nn->name);
+ nd->elems[nd->n_elems++] = nn;
+ ++i;
+ }
+ va_end(ap);
+ return nd;
+}
+
+int const indent_step = 4;
+
+void print_indent(int depth) {
+ while (depth) {
+ if (depth-- % indent_step == 0) {
+ print("|");
+ } else {
+ print(" ");
+ }
+ }
+}
+
+void print_node(struct node *n, int depth) {
+ int i = 0;
+ print_indent(depth);
+ if (n->n_elems == 0) {
+ print("%s\n", n->name);
+ } else {
+ print("(%s\n", n->name);
+ for (i = 0; i < n->n_elems; ++i) {
+ print_node(n->elems[i], depth + indent_step);
+ }
+ print_indent(depth);
+ print(")\n");
+ }
+}
+
+int main(int argc, char **argv) {
+ if (argc == 2 && strcmp(argv[1], "-v") == 0) {
+ verbose = 1;
+ } else {
+ verbose = 0;
+ }
+ int ret = 0;
+ struct node *tmp;
+ memset(pushback, '\0', PUSHBACK_LEN);
+ ret = rsparse();
+ print("--- PARSE COMPLETE: ret:%d, n_nodes:%d ---\n", ret, n_nodes);
+ if (nodes) {
+ print_node(nodes, 0);
+ }
+ while (nodes) {
+ tmp = nodes;
+ nodes = tmp->next;
+ if (tmp->own_string) {
+ free((void*)tmp->name);
+ }
+ free(tmp);
+ }
+ return ret;
+}
+
+void rserror(char const *s) {
+ fprintf(stderr, "%s\n", s);
+}
--- /dev/null
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+%{
+#define YYERROR_VERBOSE
+#define YYSTYPE struct node *
+struct node;
+extern int yylex();
+extern void yyerror(char const *s);
+extern struct node *mk_node(char const *name, int n, ...);
+extern struct node *mk_atom(char *text);
+extern struct node *mk_none();
+extern struct node *ext_node(struct node *nd, int n, ...);
+extern void push_back(char c);
+extern char *yytext;
+%}
+%debug
+
+%token SHL
+%token SHR
+%token LE
+%token EQEQ
+%token NE
+%token GE
+%token ANDAND
+%token OROR
+%token SHLEQ
+%token SHREQ
+%token MINUSEQ
+%token ANDEQ
+%token OREQ
+%token PLUSEQ
+%token STAREQ
+%token SLASHEQ
+%token CARETEQ
+%token PERCENTEQ
+%token DOTDOT
+%token DOTDOTDOT
+%token MOD_SEP
+%token RARROW
+%token FAT_ARROW
+%token LIT_BYTE
+%token LIT_CHAR
+%token LIT_INTEGER
+%token LIT_FLOAT
+%token LIT_STR
+%token LIT_STR_RAW
+%token LIT_BINARY
+%token LIT_BINARY_RAW
+%token IDENT
+%token UNDERSCORE
+%token LIFETIME
+
+// keywords
+%token SELF
+%token STATIC
+%token AS
+%token BREAK
+%token CRATE
+%token ELSE
+%token ENUM
+%token EXTERN
+%token FALSE
+%token FN
+%token FOR
+%token IF
+%token IMPL
+%token IN
+%token LET
+%token LOOP
+%token MATCH
+%token MOD
+%token MOVE
+%token MUT
+%token PRIV
+%token PUB
+%token REF
+%token RETURN
+%token STRUCT
+%token TRUE
+%token TRAIT
+%token TYPE
+%token UNSAFE
+%token USE
+%token WHILE
+%token CONTINUE
+%token PROC
+%token BOX
+%token CONST
+%token WHERE
+%token TYPEOF
+%token INNER_DOC_COMMENT
+%token OUTER_DOC_COMMENT
+
+%token SHEBANG
+%token SHEBANG_LINE
+%token STATIC_LIFETIME
+
+ /*
+ Quoting from the Bison manual:
+
+ "Finally, the resolution of conflicts works by comparing the precedence
+ of the rule being considered with that of the lookahead token. If the
+ token's precedence is higher, the choice is to shift. If the rule's
+ precedence is higher, the choice is to reduce. If they have equal
+ precedence, the choice is made based on the associativity of that
+ precedence level. The verbose output file made by ‘-v’ (see Invoking
+ Bison) says how each conflict was resolved"
+ */
+
+// We expect no shift/reduce or reduce/reduce conflicts in this grammar;
+// all potential ambiguities are scrutinized and eliminated manually.
+%expect 0
+
+// fake-precedence symbol to cause '|' bars in lambda context to parse
+// at low precedence, permit things like |x| foo = bar, where '=' is
+// otherwise lower-precedence than '|'. Also used for proc() to cause
+// things like proc() a + b to parse as proc() { a + b }.
+%precedence LAMBDA
+
+%precedence SELF
+
+// MUT should be lower precedence than IDENT so that in the pat rule,
+// "& MUT pat" has higher precedence than "binding_mode ident [@ pat]"
+%precedence MUT
+
+// IDENT needs to be lower than '{' so that 'foo {' is shifted when
+// trying to decide if we've got a struct-construction expr (esp. in
+// contexts like 'if foo { .')
+//
+// IDENT also needs to be lower precedence than '<' so that '<' in
+// 'foo:bar . <' is shifted (in a trait reference occurring in a
+// bounds list), parsing as foo:(bar<baz>) rather than (foo:bar)<baz>.
+%precedence IDENT
+
+// A couple fake-precedence symbols to use in rules associated with +
+// and < in trailing type contexts. These come up when you have a type
+// in the RHS of operator-AS, such as "foo as bar<baz>". The "<" there
+// has to be shifted so the parser keeps trying to parse a type, even
+// though it might well consider reducing the type "bar" and then
+// going on to "<" as a subsequent binop. The "+" case is with
+// trailing type-bounds ("foo as bar:A+B"), for the same reason.
+%precedence SHIFTPLUS
+
+%precedence MOD_SEP
+%precedence RARROW ':'
+
+// Binops & unops, and their precedences
+%precedence BOX
+%precedence BOXPLACE
+%nonassoc DOTDOT
+
+// RETURN needs to be lower-precedence than tokens that start
+// prefix_exprs
+%precedence RETURN
+
+%left '=' SHLEQ SHREQ MINUSEQ ANDEQ OREQ PLUSEQ STAREQ SLASHEQ CARETEQ PERCENTEQ
+%left OROR
+%left ANDAND
+%left EQEQ NE
+%left '<' '>' LE GE
+%left '|'
+%left '^'
+%left '&'
+%left SHL SHR
+%left '+' '-'
+%precedence AS
+%left '*' '/' '%'
+%precedence '!'
+
+%precedence '{' '[' '(' '.'
+
+%start crate
+
+%%
+
+////////////////////////////////////////////////////////////////////////
+// Part 1: Items and attributes
+////////////////////////////////////////////////////////////////////////
+
+crate
+: maybe_shebang inner_attrs maybe_mod_items { mk_node("crate", 2, $2, $3); }
+| maybe_shebang maybe_mod_items { mk_node("crate", 1, $2); }
+;
+
+maybe_shebang
+: SHEBANG_LINE
+| %empty
+;
+
+maybe_inner_attrs
+: inner_attrs
+| %empty { $$ = mk_none(); }
+;
+
+inner_attrs
+: inner_attr { $$ = mk_node("InnerAttrs", 1, $1); }
+| inner_attrs inner_attr { $$ = ext_node($1, 1, $2); }
+;
+
+inner_attr
+: SHEBANG '[' meta_item ']' { $$ = mk_node("InnerAttr", 1, $3); }
+| INNER_DOC_COMMENT { $$ = mk_node("InnerAttr", 1, mk_node("doc-comment", 1, mk_atom(yytext))); }
+;
+
+maybe_outer_attrs
+: outer_attrs
+| %empty { $$ = mk_none(); }
+;
+
+outer_attrs
+: outer_attr { $$ = mk_node("OuterAttrs", 1, $1); }
+| outer_attrs outer_attr { $$ = ext_node($1, 1, $2); }
+;
+
+outer_attr
+: '#' '[' meta_item ']' { $$ = $3; }
+| OUTER_DOC_COMMENT { $$ = mk_node("doc-comment", 1, mk_atom(yytext)); }
+;
+
+meta_item
+: ident { $$ = mk_node("MetaWord", 1, $1); }
+| ident '=' lit { $$ = mk_node("MetaNameValue", 2, $1, $3); }
+| ident '(' meta_seq ')' { $$ = mk_node("MetaList", 2, $1, $3); }
+| ident '(' meta_seq ',' ')' { $$ = mk_node("MetaList", 2, $1, $3); }
+;
+
+meta_seq
+: %empty { $$ = mk_none(); }
+| meta_item { $$ = mk_node("MetaItems", 1, $1); }
+| meta_seq ',' meta_item { $$ = ext_node($1, 1, $3); }
+;
+
+maybe_mod_items
+: mod_items
+| %empty { $$ = mk_none(); }
+;
+
+mod_items
+: mod_item { $$ = mk_node("Items", 1, $1); }
+| mod_items mod_item { $$ = ext_node($1, 1, $2); }
+;
+
+attrs_and_vis
+: maybe_outer_attrs visibility { $$ = mk_node("AttrsAndVis", 2, $1, $2); }
+;
+
+mod_item
+: attrs_and_vis item { $$ = mk_node("Item", 2, $1, $2); }
+;
+
+// items that can appear outside of a fn block
+item
+: item_static
+| item_const
+| item_type
+| block_item
+| view_item
+| item_macro
+;
+
+// items that can appear in "stmts"
+stmt_item
+: item_static
+| item_const
+| item_type
+| block_item
+| use_item
+| extern_fn_item
+;
+
+item_static
+: STATIC ident ':' ty '=' expr ';' { $$ = mk_node("ItemStatic", 3, $2, $4, $6); }
+| STATIC MUT ident ':' ty '=' expr ';' { $$ = mk_node("ItemStatic", 3, $3, $5, $7); }
+;
+
+item_const
+: CONST ident ':' ty '=' expr ';' { $$ = mk_node("ItemConst", 3, $2, $4, $6); }
+;
+
+item_macro
+: path_expr '!' maybe_ident parens_delimited_token_trees ';'
+| path_expr '!' maybe_ident braces_delimited_token_trees
+| path_expr '!' maybe_ident brackets_delimited_token_trees ';'
+;
+
+view_item
+: use_item
+| extern_fn_item
+| EXTERN CRATE ident ';' { $$ = mk_node("ViewItemExternCrate", 1, $3); }
+| EXTERN CRATE ident '=' str ';' { $$ = mk_node("ViewItemExternCrate", 2, $3, $5); }
+| EXTERN CRATE str AS ident ';' { $$ = mk_node("ViewItemExternCrate", 2, $3, $5); }
+;
+
+extern_fn_item
+: EXTERN maybe_abi item_fn { $$ = mk_node("ViewItemExternFn", 2, $2, $3); }
+;
+
+use_item
+: USE view_path ';' { $$ = mk_node("ViewItemUse", 1, $2); }
+;
+
+view_path
+: path_no_types_allowed { $$ = mk_node("ViewPathSimple", 1, $1); }
+| path_no_types_allowed MOD_SEP '{' '}' { $$ = mk_node("ViewPathList", 2, $1, mk_atom("ViewPathListEmpty")); }
+| path_no_types_allowed MOD_SEP '{' idents_or_self '}' { $$ = mk_node("ViewPathList", 2, $1, $4); }
+| path_no_types_allowed MOD_SEP '{' idents_or_self ',' '}' { $$ = mk_node("ViewPathList", 2, $1, $4); }
+| path_no_types_allowed MOD_SEP '*' { $$ = mk_node("ViewPathGlob", 1, $1); }
+| '{' '}' { $$ = mk_atom("ViewPathListEmpty"); }
+| '{' idents_or_self '}' { $$ = mk_node("ViewPathList", 1, $2); }
+| '{' idents_or_self ',' '}' { $$ = mk_node("ViewPathList", 1, $2); }
+| path_no_types_allowed AS ident { $$ = mk_node("ViewPathSimple", 2, $1, $3); }
+;
+
+block_item
+: item_fn
+| item_unsafe_fn
+| item_mod
+| item_foreign_mod { $$ = mk_node("ItemForeignMod", 1, $1); }
+| item_struct
+| item_enum
+| item_trait
+| item_impl
+;
+
+maybe_ty_ascription
+: ':' ty { $$ = $2; }
+| %empty { $$ = mk_none(); }
+;
+
+maybe_init_expr
+: '=' expr { $$ = $2; }
+| %empty { $$ = mk_none(); }
+;
+
+// structs
+item_struct
+: STRUCT ident generic_params maybe_where_clause struct_decl_args
+{
+ $$ = mk_node("ItemStruct", 4, $2, $3, $4, $5);
+}
+| STRUCT ident generic_params struct_tuple_args maybe_where_clause ';'
+{
+ $$ = mk_node("ItemStruct", 4, $2, $3, $4, $5);
+}
+| STRUCT ident generic_params maybe_where_clause ';'
+{
+ $$ = mk_node("ItemStruct", 3, $2, $3, $4);
+}
+;
+
+struct_decl_args
+: '{' struct_decl_fields '}' { $$ = $2; }
+| '{' struct_decl_fields ',' '}' { $$ = $2; }
+;
+
+struct_tuple_args
+: '(' struct_tuple_fields ')' { $$ = $2; }
+| '(' struct_tuple_fields ',' ')' { $$ = $2; }
+;
+
+struct_decl_fields
+: struct_decl_field { $$ = mk_node("StructFields", 1, $1); }
+| struct_decl_fields ',' struct_decl_field { $$ = ext_node($1, 1, $3); }
+| %empty { $$ = mk_none(); }
+;
+
+struct_decl_field
+: attrs_and_vis ident ':' ty_sum { $$ = mk_node("StructField", 3, $1, $2, $4); }
+;
+
+struct_tuple_fields
+: struct_tuple_field { $$ = mk_node("StructFields", 1, $1); }
+| struct_tuple_fields ',' struct_tuple_field { $$ = ext_node($1, 1, $3); }
+;
+
+struct_tuple_field
+: attrs_and_vis ty_sum { $$ = mk_node("StructField", 2, $1, $2); }
+;
+
+// enums
+item_enum
+: ENUM ident generic_params maybe_where_clause '{' enum_defs '}' { $$ = mk_node("ItemEnum", 0); }
+| ENUM ident generic_params maybe_where_clause '{' enum_defs ',' '}' { $$ = mk_node("ItemEnum", 0); }
+;
+
+enum_defs
+: enum_def { $$ = mk_node("EnumDefs", 1, $1); }
+| enum_defs ',' enum_def { $$ = ext_node($1, 1, $3); }
+| %empty { $$ = mk_none(); }
+;
+
+enum_def
+: attrs_and_vis ident enum_args { $$ = mk_node("EnumDef", 3, $1, $2, $3); }
+;
+
+enum_args
+: '{' struct_decl_fields '}' { $$ = mk_node("EnumArgs", 1, $2); }
+| '{' struct_decl_fields ',' '}' { $$ = mk_node("EnumArgs", 1, $2); }
+| '(' maybe_ty_sums ')' { $$ = mk_node("EnumArgs", 1, $2); }
+| '=' expr { $$ = mk_node("EnumArgs", 1, $2); }
+| %empty { $$ = mk_none(); }
+;
+
+item_mod
+: MOD ident ';' { $$ = mk_node("ItemMod", 1, $2); }
+| MOD ident '{' maybe_mod_items '}' { $$ = mk_node("ItemMod", 2, $2, $4); }
+| MOD ident '{' inner_attrs maybe_mod_items '}' { $$ = mk_node("ItemMod", 3, $2, $4, $5); }
+;
+
+item_foreign_mod
+: EXTERN maybe_abi '{' maybe_foreign_items '}' { $$ = mk_node("ItemForeignMod", 1, $4); }
+| EXTERN maybe_abi '{' inner_attrs maybe_foreign_items '}' { $$ = mk_node("ItemForeignMod", 2, $4, $5); }
+;
+
+maybe_abi
+: str
+| %empty { $$ = mk_none(); }
+;
+
+maybe_foreign_items
+: foreign_items
+| %empty { $$ = mk_none(); }
+;
+
+foreign_items
+: foreign_item { $$ = mk_node("ForeignItems", 1, $1); }
+| foreign_items foreign_item { $$ = ext_node($1, 1, $2); }
+;
+
+foreign_item
+: attrs_and_vis STATIC item_foreign_static { $$ = mk_node("ForeignItem", 2, $1, $3); }
+| attrs_and_vis item_foreign_fn { $$ = mk_node("ForeignItem", 2, $1, $2); }
+| attrs_and_vis UNSAFE item_foreign_fn { $$ = mk_node("ForeignItem", 2, $1, $3); }
+;
+
+item_foreign_static
+: maybe_mut ident ':' ty ';' { $$ = mk_node("StaticItem", 3, $1, $2, $4); }
+;
+
+item_foreign_fn
+: FN ident generic_params fn_decl_allow_variadic maybe_where_clause ';' { $$ = mk_node("ForeignFn", 4, $2, $3, $4, $5); }
+;
+
+fn_decl_allow_variadic
+: fn_params_allow_variadic ret_ty { $$ = mk_node("FnDecl", 2, $1, $2); }
+;
+
+fn_params_allow_variadic
+: '(' ')' { $$ = mk_none(); }
+| '(' params ')' { $$ = $2; }
+| '(' params ',' ')' { $$ = $2; }
+| '(' params ',' DOTDOTDOT ')' { $$ = $2; }
+;
+
+visibility
+: PUB { $$ = mk_atom("Public"); }
+| %empty { $$ = mk_atom("Inherited"); }
+;
+
+idents_or_self
+: ident_or_self { $$ = mk_node("IdentsOrSelf", 1, $1); }
+| idents_or_self ',' ident_or_self { $$ = ext_node($1, 1, $3); }
+;
+
+ident_or_self
+: ident
+| SELF { $$ = mk_atom(yytext); }
+;
+
+item_type
+: TYPE ident generic_params maybe_where_clause '=' ty_sum ';' { $$ = mk_node("ItemTy", 4, $2, $3, $4, $6); }
+;
+
+for_sized
+: FOR '?' ident { $$ = mk_node("ForSized", 1, $3); }
+| FOR ident '?' { $$ = mk_node("ForSized", 1, $2); }
+| %empty { $$ = mk_none(); }
+;
+
+item_trait
+: maybe_unsafe TRAIT ident generic_params for_sized maybe_ty_param_bounds maybe_where_clause '{' maybe_trait_items '}'
+{
+ $$ = mk_node("ItemTrait", 7, $1, $3, $4, $5, $6, $7, $9);
+}
+;
+
+maybe_trait_items
+: trait_items
+| %empty { $$ = mk_none(); }
+;
+
+trait_items
+: trait_item { $$ = mk_node("TraitItems", 1, $1); }
+| trait_items trait_item { $$ = ext_node($1, 1, $2); }
+;
+
+trait_item
+: trait_type
+| trait_method
+;
+
+trait_type
+: maybe_outer_attrs TYPE ty_param ';' { $$ = mk_node("TypeTraitItem", 2, $1, $3); }
+;
+
+maybe_unsafe
+: UNSAFE { $$ = mk_atom("Unsafe"); }
+| %empty { $$ = mk_none(); }
+;
+
+trait_method
+: type_method { $$ = mk_node("Required", 1, $1); }
+| method { $$ = mk_node("Provided", 1, $1); }
+;
+
+type_method
+: attrs_and_vis maybe_unsafe FN ident generic_params fn_decl_with_self_allow_anon_params maybe_where_clause ';'
+{
+ $$ = mk_node("TypeMethod", 6, $1, $2, $4, $5, $6, $7);
+}
+| attrs_and_vis maybe_unsafe EXTERN maybe_abi FN ident generic_params fn_decl_with_self_allow_anon_params maybe_where_clause ';'
+{
+ $$ = mk_node("TypeMethod", 7, $1, $2, $4, $6, $7, $8, $9);
+}
+;
+
+method
+: attrs_and_vis maybe_unsafe FN ident generic_params fn_decl_with_self_allow_anon_params maybe_where_clause inner_attrs_and_block
+{
+ $$ = mk_node("Method", 7, $1, $2, $4, $5, $6, $7, $8);
+}
+| attrs_and_vis maybe_unsafe EXTERN maybe_abi FN ident generic_params fn_decl_with_self_allow_anon_params maybe_where_clause inner_attrs_and_block
+{
+ $$ = mk_node("Method", 8, $1, $2, $4, $6, $7, $8, $9, $10);
+}
+;
+
+impl_method
+: attrs_and_vis maybe_unsafe FN ident generic_params fn_decl_with_self maybe_where_clause inner_attrs_and_block
+{
+ $$ = mk_node("Method", 7, $1, $2, $4, $5, $6, $7, $8);
+}
+| attrs_and_vis maybe_unsafe EXTERN maybe_abi FN ident generic_params fn_decl_with_self maybe_where_clause inner_attrs_and_block
+{
+ $$ = mk_node("Method", 8, $1, $2, $4, $6, $7, $8, $9, $10);
+}
+;
+
+// There are two forms of impl:
+//
+// impl (<...>)? TY { ... }
+// impl (<...>)? TRAIT for TY { ... }
+//
+// Unfortunately since TY can begin with '<' itself -- as part of a
+// TyQualifiedPath type -- there's an s/r conflict when we see '<' after IMPL:
+// should we reduce one of the early rules of TY (such as maybe_once)
+// or shall we continue shifting into the generic_params list for the
+// impl?
+//
+// The production parser disambiguates a different case here by
+// permitting / requiring the user to provide parens around types when
+// they are ambiguous with traits. We do the same here, regrettably,
+// by splitting ty into ty and ty_prim.
+item_impl
+: maybe_unsafe IMPL generic_params ty_prim_sum maybe_where_clause '{' maybe_inner_attrs maybe_impl_items '}'
+{
+ $$ = mk_node("ItemImpl", 6, $1, $3, $4, $5, $7, $8);
+}
+| maybe_unsafe IMPL generic_params '(' ty ')' maybe_where_clause '{' maybe_inner_attrs maybe_impl_items '}'
+{
+ $$ = mk_node("ItemImpl", 6, $1, $3, 5, $6, $9, $10);
+}
+| maybe_unsafe IMPL generic_params trait_ref FOR ty maybe_where_clause '{' maybe_inner_attrs maybe_impl_items '}'
+{
+ $$ = mk_node("ItemImpl", 6, $3, $4, $6, $7, $9, $10);
+}
+| maybe_unsafe IMPL generic_params '!' trait_ref FOR ty maybe_where_clause '{' maybe_inner_attrs maybe_impl_items '}'
+{
+ $$ = mk_node("ItemImplNeg", 7, $1, $3, $5, $7, $8, $10, $11);
+}
+;
+
+maybe_impl_items
+: impl_items
+| %empty { $$ = mk_none(); }
+;
+
+impl_items
+: impl_item { $$ = mk_node("ImplItems", 1, $1); }
+| impl_item impl_items { $$ = ext_node($1, 1, $2); }
+;
+
+impl_item
+: impl_method
+| item_macro
+| trait_type
+;
+
+item_fn
+: FN ident generic_params fn_decl maybe_where_clause inner_attrs_and_block
+{
+ $$ = mk_node("ItemFn", 5, $2, $3, $4, $5, $6);
+}
+;
+
+item_unsafe_fn
+: UNSAFE FN ident generic_params fn_decl maybe_where_clause inner_attrs_and_block
+{
+ $$ = mk_node("ItemUnsafeFn", 5, $3, $4, $5, $6, $7);
+}
+| UNSAFE EXTERN maybe_abi FN ident generic_params fn_decl maybe_where_clause inner_attrs_and_block
+{
+ $$ = mk_node("ItemUnsafeFn", 6, $3, $5, $6, $7, $8, $9);
+}
+;
+
+fn_decl
+: fn_params ret_ty { $$ = mk_node("FnDecl", 2, $1, $2); }
+;
+
+fn_decl_with_self
+: fn_params_with_self ret_ty { $$ = mk_node("FnDecl", 2, $1, $2); }
+;
+
+fn_decl_with_self_allow_anon_params
+: fn_anon_params_with_self ret_ty { $$ = mk_node("FnDecl", 2, $1, $2); }
+;
+
+fn_params
+: '(' maybe_params ')' { $$ = $2; }
+;
+
+fn_anon_params
+: '(' anon_param anon_params_allow_variadic_tail ')' { $$ = ext_node($2, 1, $3); }
+| '(' ')' { $$ = mk_none(); }
+;
+
+fn_params_with_self
+: '(' maybe_mut SELF maybe_ty_ascription maybe_comma_params ')' { $$ = mk_node("SelfValue", 3, $2, $4, $5); }
+| '(' '&' maybe_mut SELF maybe_ty_ascription maybe_comma_params ')' { $$ = mk_node("SelfRegion", 3, $3, $5, $6); }
+| '(' '&' lifetime maybe_mut SELF maybe_ty_ascription maybe_comma_params ')' { $$ = mk_node("SelfRegion", 4, $3, $4, $6, $7); }
+| '(' maybe_params ')' { $$ = mk_node("SelfStatic", 1, $2); }
+;
+
+fn_anon_params_with_self
+: '(' maybe_mut SELF maybe_ty_ascription maybe_comma_anon_params ')' { $$ = mk_node("SelfValue", 3, $2, $4, $5); }
+| '(' '&' maybe_mut SELF maybe_ty_ascription maybe_comma_anon_params ')' { $$ = mk_node("SelfRegion", 3, $3, $5, $6); }
+| '(' '&' lifetime maybe_mut SELF maybe_ty_ascription maybe_comma_anon_params ')' { $$ = mk_node("SelfRegion", 4, $3, $4, $6, $7); }
+| '(' maybe_anon_params ')' { $$ = mk_node("SelfStatic", 1, $2); }
+;
+
+maybe_params
+: params
+| params ','
+| %empty { $$ = mk_none(); }
+;
+
+params
+: param { $$ = mk_node("Args", 1, $1); }
+| params ',' param { $$ = ext_node($1, 1, $3); }
+;
+
+param
+: pat ':' ty { $$ = mk_node("Arg", 2, $1, $3); }
+;
+
+inferrable_params
+: inferrable_param { $$ = mk_node("InferrableParams", 1, $1); }
+| inferrable_params ',' inferrable_param { $$ = ext_node($1, 1, $3); }
+;
+
+inferrable_param
+: pat maybe_ty_ascription { $$ = mk_node("InferrableParam", 2, $1, $2); }
+;
+
+maybe_unboxed_closure_kind
+: %empty
+| ':'
+| '&' maybe_mut ':'
+;
+
+maybe_comma_params
+: ',' { $$ = mk_none(); }
+| ',' params { $$ = $2; }
+| ',' params ',' { $$ = $2; }
+| %empty { $$ = mk_none(); }
+;
+
+maybe_comma_anon_params
+: ',' { $$ = mk_none(); }
+| ',' anon_params { $$ = $2; }
+| ',' anon_params ',' { $$ = $2; }
+| %empty { $$ = mk_none(); }
+;
+
+maybe_anon_params
+: anon_params
+| anon_params ','
+| %empty { $$ = mk_none(); }
+;
+
+anon_params
+: anon_param { $$ = mk_node("Args", 1, $1); }
+| anon_params ',' anon_param { $$ = ext_node($1, 1, $3); }
+;
+
+// anon means it's allowed to be anonymous (type-only), but it can
+// still have a name
+anon_param
+: named_arg ':' ty { $$ = mk_node("Arg", 2, $1, $3); }
+| ty
+;
+
+anon_params_allow_variadic_tail
+: ',' DOTDOTDOT { $$ = mk_none(); }
+| ',' anon_param anon_params_allow_variadic_tail { $$ = mk_node("Args", 2, $2, $3); }
+| %empty { $$ = mk_none(); }
+;
+
+named_arg
+: ident
+| UNDERSCORE { $$ = mk_atom("PatWild"); }
+| '&' ident { $$ = $2; }
+| '&' UNDERSCORE { $$ = mk_atom("PatWild"); }
+| ANDAND ident { $$ = $2; }
+| ANDAND UNDERSCORE { $$ = mk_atom("PatWild"); }
+| MUT ident { $$ = $2; }
+;
+
+ret_ty
+: RARROW '!' { $$ = mk_none(); }
+| RARROW ty { $$ = mk_node("ret-ty", 1, $2); }
+| %prec IDENT %empty { $$ = mk_none(); }
+;
+
+generic_params
+: '<' lifetimes '>' { $$ = mk_node("Generics", 2, $2, mk_none()); }
+| '<' lifetimes ',' '>' { $$ = mk_node("Generics", 2, $2, mk_none()); }
+| '<' lifetimes SHR { push_back('>'); $$ = mk_node("Generics", 2, $2, mk_none()); }
+| '<' lifetimes ',' SHR { push_back('>'); $$ = mk_node("Generics", 2, $2, mk_none()); }
+| '<' lifetimes ',' ty_params '>' { $$ = mk_node("Generics", 2, $2, $4); }
+| '<' lifetimes ',' ty_params ',' '>' { $$ = mk_node("Generics", 2, $2, $4); }
+| '<' lifetimes ',' ty_params SHR { push_back('>'); $$ = mk_node("Generics", 2, $2, $4); }
+| '<' lifetimes ',' ty_params ',' SHR { push_back('>'); $$ = mk_node("Generics", 2, $2, $4); }
+| '<' ty_params '>' { $$ = mk_node("Generics", 2, mk_none(), $2); }
+| '<' ty_params ',' '>' { $$ = mk_node("Generics", 2, mk_none(), $2); }
+| '<' ty_params SHR { push_back('>'); $$ = mk_node("Generics", 2, mk_none(), $2); }
+| '<' ty_params ',' SHR { push_back('>'); $$ = mk_node("Generics", 2, mk_none(), $2); }
+| %empty { $$ = mk_none(); }
+;
+
+maybe_where_clause
+: %empty { $$ = mk_none(); }
+| where_clause
+;
+
+where_clause
+: WHERE where_predicates { $$ = mk_node("WhereClause", 1, $2); }
+| WHERE where_predicates ',' { $$ = mk_node("WhereClause", 1, $2); }
+;
+
+where_predicates
+: where_predicate { $$ = mk_node("WherePredicates", 1, $1); }
+| where_predicates ',' where_predicate { $$ = ext_node($1, 1, $3); }
+;
+
+where_predicate
+: lifetime ':' bounds { $$ = mk_node("WherePredicate", 2, $1, $3); }
+| ty ':' ty_param_bounds { $$ = mk_node("WherePredicate", 2, $1, $3); }
+;
+
+ty_params
+: ty_param { $$ = mk_node("TyParams", 1, $1); }
+| ty_params ',' ty_param { $$ = ext_node($1, 1, $3); }
+;
+
+// A path with no type parameters; e.g. `foo::bar::Baz`
+//
+// These show up in 'use' view-items, because these are processed
+// without respect to types.
+path_no_types_allowed
+: ident { $$ = mk_node("ViewPath", 1, $1); }
+| MOD_SEP ident { $$ = mk_node("ViewPath", 1, $2); }
+| SELF { $$ = mk_node("ViewPath", 1, mk_atom("Self")); }
+| MOD_SEP SELF { $$ = mk_node("ViewPath", 1, mk_atom("Self")); }
+| path_no_types_allowed MOD_SEP ident { $$ = ext_node($1, 1, $3); }
+;
+
+// A path with a lifetime and type parameters, with no double colons
+// before the type parameters; e.g. `foo::bar<'a>::Baz<T>`
+//
+// These show up in "trait references", the components of
+// type-parameter bounds lists, as well as in the prefix of the
+// path_generic_args_and_bounds rule, which is the full form of a
+// named typed expression.
+//
+// They do not have (nor need) an extra '::' before '<' because
+// unlike in expr context, there are no "less-than" type exprs to
+// be ambiguous with.
+path_generic_args_without_colons
+: %prec IDENT
+ ident { $$ = mk_node("components", 1, $1); }
+| %prec IDENT
+ ident generic_args { $$ = mk_node("components", 2, $1, $2); }
+| %prec IDENT
+ ident '(' maybe_ty_sums ')' ret_ty { $$ = mk_node("components", 2, $1, $3); }
+| %prec IDENT
+ path_generic_args_without_colons MOD_SEP ident { $$ = ext_node($1, 1, $3); }
+| %prec IDENT
+ path_generic_args_without_colons MOD_SEP ident generic_args { $$ = ext_node($1, 2, $3, $4); }
+| %prec IDENT
+ path_generic_args_without_colons MOD_SEP ident '(' maybe_ty_sums ')' ret_ty { $$ = ext_node($1, 2, $3, $5); }
+;
+
+generic_args
+: '<' generic_values '>' { $$ = $2; }
+| '<' generic_values SHR { push_back('>'); $$ = $2; }
+| '<' generic_values GE { push_back('='); $$ = $2; }
+| '<' generic_values SHREQ { push_back('>'); push_back('='); $$ = $2; }
+// If generic_args starts with "<<", the first arg must be a
+// TyQualifiedPath because that's the only type that can start with a
+// '<'. This rule parses that as the first ty_sum and then continues
+// with the rest of generic_values.
+| SHL ty_qualified_path_and_generic_values '>' { $$ = $2; }
+| SHL ty_qualified_path_and_generic_values SHR { push_back('>'); $$ = $2; }
+| SHL ty_qualified_path_and_generic_values GE { push_back('='); $$ = $2; }
+| SHL ty_qualified_path_and_generic_values SHREQ { push_back('>'); push_back('='); $$ = $2; }
+;
+
+generic_values
+: maybe_lifetimes maybe_ty_sums_and_or_bindings { $$ = mk_node("GenericValues", 2, $1, $2); }
+;
+
+maybe_ty_sums_and_or_bindings
+: ty_sums
+| ty_sums ','
+| ty_sums ',' bindings { $$ = mk_node("TySumsAndBindings", 2, $1, $3); }
+| bindings
+| bindings ','
+| %empty { $$ = mk_none(); }
+;
+
+maybe_bindings
+: ',' bindings { $$ = $2; }
+| %empty { $$ = mk_none(); }
+;
+
+////////////////////////////////////////////////////////////////////////
+// Part 2: Patterns
+////////////////////////////////////////////////////////////////////////
+
+pat
+: UNDERSCORE { $$ = mk_atom("PatWild"); }
+| '&' pat { $$ = mk_node("PatRegion", 1, $2); }
+| '&' MUT pat { $$ = mk_node("PatRegion", 1, $3); }
+| ANDAND pat { $$ = mk_node("PatRegion", 1, mk_node("PatRegion", 1, $2)); }
+| '(' ')' { $$ = mk_atom("PatUnit"); }
+| '(' pat_tup ')' { $$ = mk_node("PatTup", 1, $2); }
+| '(' pat_tup ',' ')' { $$ = mk_node("PatTup", 1, $2); }
+| '[' pat_vec ']' { $$ = mk_node("PatVec", 1, $2); }
+| lit_or_path
+| lit_or_path DOTDOTDOT lit_or_path { $$ = mk_node("PatRange", 2, $1, $3); }
+| path_expr '{' pat_struct '}' { $$ = mk_node("PatStruct", 2, $1, $3); }
+| path_expr '(' DOTDOT ')' { $$ = mk_node("PatEnum", 1, $1); }
+| path_expr '(' pat_tup ')' { $$ = mk_node("PatEnum", 2, $1, $3); }
+| path_expr '!' maybe_ident delimited_token_trees { $$ = mk_node("PatMac", 3, $1, $3, $4); }
+| binding_mode ident { $$ = mk_node("PatIdent", 2, $1, $2); }
+| ident '@' pat { $$ = mk_node("PatIdent", 3, mk_node("BindByValue", 1, mk_atom("MutImmutable")), $1, $3); }
+| binding_mode ident '@' pat { $$ = mk_node("PatIdent", 3, $1, $2, $4); }
+| BOX pat { $$ = mk_node("PatUniq", 1, $2); }
+;
+
+pats_or
+: pat { $$ = mk_node("Pats", 1, $1); }
+| pats_or '|' pat { $$ = ext_node($1, 1, $3); }
+;
+
+binding_mode
+: REF { $$ = mk_node("BindByRef", 1, mk_atom("MutImmutable")); }
+| REF MUT { $$ = mk_node("BindByRef", 1, mk_atom("MutMutable")); }
+| MUT { $$ = mk_node("BindByValue", 1, mk_atom("MutMutable")); }
+;
+
+lit_or_path
+: path_expr { $$ = mk_node("PatLit", 1, $1); }
+| lit { $$ = mk_node("PatLit", 1, $1); }
+| '-' lit { $$ = mk_node("PatLit", 1, $2); }
+;
+
+pat_field
+: ident { $$ = mk_node("PatField", 1, $1); }
+| binding_mode ident { $$ = mk_node("PatField", 2, $1, $2); }
+| BOX ident { $$ = mk_node("PatField", 2, mk_atom("box"), $2); }
+| BOX binding_mode ident { $$ = mk_node("PatField", 3, mk_atom("box"), $2, $3); }
+| ident ':' pat { $$ = mk_node("PatField", 2, $1, $3); }
+| binding_mode ident ':' pat { $$ = mk_node("PatField", 3, $1, $2, $4); }
+;
+
+pat_fields
+: pat_field { $$ = mk_node("PatFields", 1, $1); }
+| pat_fields ',' pat_field { $$ = ext_node($1, 1, $3); }
+;
+
+pat_struct
+: pat_fields { $$ = mk_node("PatStruct", 2, $1, mk_atom("false")); }
+| pat_fields ',' { $$ = mk_node("PatStruct", 2, $1, mk_atom("false")); }
+| pat_fields ',' DOTDOT { $$ = mk_node("PatStruct", 2, $1, mk_atom("true")); }
+| DOTDOT { $$ = mk_node("PatStruct", 1, mk_atom("true")); }
+;
+
+pat_tup
+: pat { $$ = mk_node("pat_tup", 1, $1); }
+| pat_tup ',' pat { $$ = ext_node($1, 1, $3); }
+;
+
+pat_vec
+: pat_vec_elts { $$ = mk_node("PatVec", 2, $1, mk_none()); }
+| pat_vec_elts ',' { $$ = mk_node("PatVec", 2, $1, mk_none()); }
+| pat_vec_elts DOTDOT { $$ = mk_node("PatVec", 2, $1, mk_none()); }
+| pat_vec_elts ',' DOTDOT { $$ = mk_node("PatVec", 2, $1, mk_none()); }
+| pat_vec_elts DOTDOT ',' pat_vec_elts { $$ = mk_node("PatVec", 2, $1, $4); }
+| pat_vec_elts DOTDOT ',' pat_vec_elts ',' { $$ = mk_node("PatVec", 2, $1, $4); }
+| pat_vec_elts ',' DOTDOT ',' pat_vec_elts { $$ = mk_node("PatVec", 2, $1, $5); }
+| pat_vec_elts ',' DOTDOT ',' pat_vec_elts ',' { $$ = mk_node("PatVec", 2, $1, $5); }
+| DOTDOT ',' pat_vec_elts { $$ = mk_node("PatVec", 2, mk_none(), $3); }
+| DOTDOT ',' pat_vec_elts ',' { $$ = mk_node("PatVec", 2, mk_none(), $3); }
+| DOTDOT { $$ = mk_node("PatVec", 2, mk_none(), mk_none()); }
+| %empty { $$ = mk_node("PatVec", 2, mk_none(), mk_none()); }
+;
+
+pat_vec_elts
+: pat { $$ = mk_node("PatVecElts", 1, $1); }
+| pat_vec_elts ',' pat { $$ = ext_node($1, 1, $3); }
+;
+
+////////////////////////////////////////////////////////////////////////
+// Part 3: Types
+////////////////////////////////////////////////////////////////////////
+
+ty
+: ty_prim
+| ty_closure
+| '<' ty_sum AS trait_ref '>' MOD_SEP ident { $$ = mk_node("TyQualifiedPath", 3, $2, $4, $7); }
+| SHL ty_sum AS trait_ref '>' MOD_SEP ident AS trait_ref '>' MOD_SEP ident { $$ = mk_node("TyQualifiedPath", 3, mk_node("TyQualifiedPath", 3, $2, $4, $7), $9, $12); }
+| '(' ty_sums ')' { $$ = mk_node("TyTup", 1, $2); }
+| '(' ty_sums ',' ')' { $$ = mk_node("TyTup", 1, $2); }
+| '(' ')' { $$ = mk_atom("TyNil"); }
+;
+
+ty_prim
+: %prec IDENT path_generic_args_without_colons { $$ = mk_node("TyPath", 2, mk_node("global", 1, mk_atom("false")), $1); }
+| %prec IDENT MOD_SEP path_generic_args_without_colons { $$ = mk_node("TyPath", 2, mk_node("global", 1, mk_atom("true")), $2); }
+| %prec IDENT SELF MOD_SEP path_generic_args_without_colons { $$ = mk_node("TyPath", 2, mk_node("self", 1, mk_atom("true")), $3); }
+| BOX ty { $$ = mk_node("TyBox", 1, $2); }
+| '*' maybe_mut_or_const ty { $$ = mk_node("TyPtr", 2, $2, $3); }
+| '&' ty { $$ = mk_node("TyRptr", 2, mk_atom("MutImmutable"), $2); }
+| '&' MUT ty { $$ = mk_node("TyRptr", 2, mk_atom("MutMutable"), $3); }
+| ANDAND ty { $$ = mk_node("TyRptr", 1, mk_node("TyRptr", 2, mk_atom("MutImmutable"), $2)); }
+| ANDAND MUT ty { $$ = mk_node("TyRptr", 1, mk_node("TyRptr", 2, mk_atom("MutMutable"), $3)); }
+| '&' lifetime maybe_mut ty { $$ = mk_node("TyRptr", 3, $2, $3, $4); }
+| ANDAND lifetime maybe_mut ty { $$ = mk_node("TyRptr", 1, mk_node("TyRptr", 3, $2, $3, $4)); }
+| '[' ty ']' { $$ = mk_node("TyVec", 1, $2); }
+| '[' ty ',' DOTDOT expr ']' { $$ = mk_node("TyFixedLengthVec", 2, $2, $5); }
+| '[' ty ';' expr ']' { $$ = mk_node("TyFixedLengthVec", 2, $2, $4); }
+| TYPEOF '(' expr ')' { $$ = mk_node("TyTypeof", 1, $3); }
+| UNDERSCORE { $$ = mk_atom("TyInfer"); }
+| ty_bare_fn
+| ty_proc
+| for_in_type
+;
+
+ty_bare_fn
+: FN ty_fn_decl { $$ = $2; }
+| UNSAFE FN ty_fn_decl { $$ = $3; }
+| EXTERN maybe_abi FN ty_fn_decl { $$ = $4; }
+| UNSAFE EXTERN maybe_abi FN ty_fn_decl { $$ = $5; }
+;
+
+ty_fn_decl
+: generic_params fn_anon_params ret_ty { $$ = mk_node("TyFnDecl", 3, $1, $2, $3); }
+;
+
+ty_closure
+: UNSAFE '|' anon_params '|' maybe_bounds ret_ty { $$ = mk_node("TyClosure", 3, $3, $5, $6); }
+| '|' anon_params '|' maybe_bounds ret_ty { $$ = mk_node("TyClosure", 3, $2, $4, $5); }
+| UNSAFE OROR maybe_bounds ret_ty { $$ = mk_node("TyClosure", 2, $3, $4); }
+| OROR maybe_bounds ret_ty { $$ = mk_node("TyClosure", 2, $2, $3); }
+;
+
+ty_proc
+: PROC generic_params fn_params maybe_bounds ret_ty { $$ = mk_node("TyProc", 4, $2, $3, $4, $5); }
+;
+
+for_in_type
+: FOR '<' maybe_lifetimes '>' for_in_type_suffix { $$ = mk_node("ForInType", 2, $3, $5); }
+;
+
+for_in_type_suffix
+: ty_proc
+| ty_bare_fn
+| trait_ref
+| ty_closure
+;
+
+maybe_mut
+: MUT { $$ = mk_atom("MutMutable"); }
+| %prec MUT %empty { $$ = mk_atom("MutImmutable"); }
+;
+
+maybe_mut_or_const
+: MUT { $$ = mk_atom("MutMutable"); }
+| CONST { $$ = mk_atom("MutImmutable"); }
+| %empty { $$ = mk_atom("MutImmutable"); }
+;
+
+ty_qualified_path_and_generic_values
+: ty_qualified_path maybe_bindings
+{
+ $$ = mk_node("GenericValues", 3, mk_none(), mk_node("TySums", 1, mk_node("TySum", 1, $1)), $2);
+}
+| ty_qualified_path ',' ty_sums maybe_bindings
+{
+ $$ = mk_node("GenericValues", 3, mk_none(), ext_node(mk_node("TySums", 1, $1), 1, $3), $4); }
+;
+
+ty_qualified_path
+: ty_sum AS trait_ref '>' MOD_SEP ident { $$ = mk_node("TyQualifiedPath", 3, $1, $3, $6); }
+| ty_sum AS trait_ref '>' MOD_SEP ident '+' ty_param_bounds { $$ = mk_node("TyQualifiedPath", 3, $1, $3, $6); }
+;
+
+maybe_ty_sums
+: ty_sums
+| ty_sums ','
+| %empty { $$ = mk_none(); }
+;
+
+ty_sums
+: ty_sum { $$ = mk_node("TySums", 1, $1); }
+| ty_sums ',' ty_sum { $$ = ext_node($1, 1, $3); }
+;
+
+ty_sum
+: ty { $$ = mk_node("TySum", 1, $1); }
+| ty '+' ty_param_bounds { $$ = mk_node("TySum", 2, $1, $3); }
+;
+
+ty_prim_sum
+: ty_prim { $$ = mk_node("TySum", 1, $1); }
+| ty_prim '+' ty_param_bounds { $$ = mk_node("TySum", 2, $1, $3); }
+;
+
+maybe_ty_param_bounds
+: ':' ty_param_bounds { $$ = $2; }
+| %empty { $$ = mk_none(); }
+;
+
+ty_param_bounds
+: boundseq
+| %empty { $$ = mk_none(); }
+;
+
+boundseq
+: polybound
+| boundseq '+' polybound { $$ = ext_node($1, 1, $3); }
+;
+
+polybound
+: FOR '<' maybe_lifetimes '>' bound { $$ = mk_node("PolyBound", 2, $3, $5); }
+| bound
+| '?' bound { $$ = $2; }
+;
+
+bindings
+: binding { $$ = mk_node("Bindings", 1, $1); }
+| bindings ',' binding { $$ = ext_node($1, 1, $3); }
+;
+
+binding
+: ident '=' ty { mk_node("Binding", 2, $1, $3); }
+;
+
+ty_param
+: ident maybe_ty_param_bounds maybe_ty_default { $$ = mk_node("TyParam", 3, $1, $2, $3); }
+| ident '?' ident maybe_ty_param_bounds maybe_ty_default { $$ = mk_node("TyParam", 4, $1, $3, $4, $5); }
+;
+
+maybe_bounds
+: %prec SHIFTPLUS
+ ':' bounds { $$ = $2; }
+| %prec SHIFTPLUS %empty { $$ = mk_none(); }
+;
+
+bounds
+: bound { $$ = mk_node("bounds", 1, $1); }
+| bounds '+' bound { $$ = ext_node($1, 1, $3); }
+;
+
+bound
+: lifetime
+| trait_ref
+;
+
+maybe_ltbounds
+: %prec SHIFTPLUS
+ ':' ltbounds { $$ = $2; }
+| %empty { $$ = mk_none(); }
+;
+
+ltbounds
+: lifetime { $$ = mk_node("ltbounds", 1, $1); }
+| ltbounds '+' lifetime { $$ = ext_node($1, 1, $3); }
+;
+
+maybe_ty_default
+: '=' ty_sum { $$ = mk_node("TyDefault", 1, $2); }
+| %empty { $$ = mk_none(); }
+;
+
+maybe_lifetimes
+: lifetimes
+| lifetimes ','
+| %empty { $$ = mk_none(); }
+;
+
+lifetimes
+: lifetime_and_bounds { $$ = mk_node("Lifetimes", 1, $1); }
+| lifetimes ',' lifetime_and_bounds { $$ = ext_node($1, 1, $3); }
+;
+
+lifetime_and_bounds
+: LIFETIME maybe_ltbounds { $$ = mk_node("lifetime", 2, mk_atom(yytext), $2); }
+| STATIC_LIFETIME { $$ = mk_atom("static_lifetime"); }
+;
+
+lifetime
+: LIFETIME { $$ = mk_node("lifetime", 1, mk_atom(yytext)); }
+| STATIC_LIFETIME { $$ = mk_atom("static_lifetime"); }
+;
+
+trait_ref
+: %prec IDENT path_generic_args_without_colons
+| %prec IDENT MOD_SEP path_generic_args_without_colons { $$ = $2; }
+;
+
+////////////////////////////////////////////////////////////////////////
+// Part 4: Blocks, statements, and expressions
+////////////////////////////////////////////////////////////////////////
+
+inner_attrs_and_block
+: '{' maybe_inner_attrs maybe_stmts '}' { $$ = mk_node("ExprBlock", 2, $2, $3); }
+;
+
+block
+: '{' maybe_stmts '}' { $$ = mk_node("ExprBlock", 1, $2); }
+;
+
+maybe_stmts
+: stmts
+| stmts nonblock_expr { $$ = ext_node($1, 1, $2); }
+| nonblock_expr
+| %empty { $$ = mk_none(); }
+;
+
+// There are two sub-grammars within a "stmts: exprs" derivation
+// depending on whether each stmt-expr is a block-expr form; this is to
+// handle the "semicolon rule" for stmt sequencing that permits
+// writing
+//
+// if foo { bar } 10
+//
+// as a sequence of two stmts (one if-expr stmt, one lit-10-expr
+// stmt). Unfortunately by permitting juxtaposition of exprs in
+// sequence like that, the non-block expr grammar has to have a
+// second limited sub-grammar that excludes the prefix exprs that
+// are ambiguous with binops. That is to say:
+//
+// {10} - 1
+//
+// should parse as (progn (progn 10) (- 1)) not (- (progn 10) 1), that
+// is to say, two statements rather than one, at least according to
+// the mainline rust parser.
+//
+// So we wind up with a 3-way split in exprs that occur in stmt lists:
+// block, nonblock-prefix, and nonblock-nonprefix.
+//
+// In non-stmts contexts, expr can relax this trichotomy.
+//
+// There are also two other expr subtypes: first, nonparen_expr
+// disallows exprs surrounded by parens (including tuple expressions),
+// this is neccesary for BOX (place) expressions, so a parens expr
+// following the BOX is always parsed as the place. There is also
+// expr_norange used in index_expr, which disallows '..' in
+// expressions as that has special meaning inside of brackets.
+
+stmts
+: stmt { $$ = mk_node("stmts", 1, $1); }
+| stmts stmt { $$ = ext_node($1, 1, $2); }
+;
+
+stmt
+: let
+| stmt_item
+| PUB stmt_item { $$ = $2; }
+| outer_attrs stmt_item { $$ = $2; }
+| outer_attrs PUB stmt_item { $$ = $3; }
+| full_block_expr
+| block
+| nonblock_expr ';'
+| ';' { $$ = mk_none(); }
+;
+
+maybe_exprs
+: exprs
+| exprs ','
+| %empty { $$ = mk_none(); }
+;
+
+maybe_expr
+: expr
+| %empty { $$ = mk_none(); }
+;
+
+exprs
+: expr { $$ = mk_node("exprs", 1, $1); }
+| exprs ',' expr { $$ = ext_node($1, 1, $3); }
+;
+
+path_expr
+: path_generic_args_with_colons
+| MOD_SEP path_generic_args_with_colons { $$ = $2; }
+| SELF MOD_SEP path_generic_args_with_colons { $$ = mk_node("SelfPath", 1, $3); }
+;
+
+// A path with a lifetime and type parameters with double colons before
+// the type parameters; e.g. `foo::bar::<'a>::Baz::<T>`
+//
+// These show up in expr context, in order to disambiguate from "less-than"
+// expressions.
+path_generic_args_with_colons
+: ident { $$ = mk_node("components", 1, $1); }
+| path_generic_args_with_colons MOD_SEP ident { $$ = ext_node($1, 1, $3); }
+| path_generic_args_with_colons MOD_SEP generic_args { $$ = ext_node($1, 1, $3); }
+;
+
+// the braces-delimited macro is a block_expr so it doesn't appear here
+macro_expr
+: path_expr '!' maybe_ident parens_delimited_token_trees { $$ = mk_node("MacroExpr", 3, $1, $3, $4); }
+| path_expr '!' maybe_ident brackets_delimited_token_trees { $$ = mk_node("MacroExpr", 3, $1, $3, $4); }
+;
+
+nonblock_expr
+: lit { $$ = mk_node("ExprLit", 1, $1); }
+| %prec IDENT
+ path_expr { $$ = mk_node("ExprPath", 1, $1); }
+| SELF { $$ = mk_node("ExprPath", 1, mk_node("ident", 1, mk_atom("self"))); }
+| macro_expr { $$ = mk_node("ExprMac", 1, $1); }
+| path_expr '{' struct_expr_fields '}' { $$ = mk_node("ExprStruct", 2, $1, $3); }
+| nonblock_expr '.' path_generic_args_with_colons { $$ = mk_node("ExprField", 2, $1, $3); }
+| nonblock_expr '.' LIT_INTEGER { $$ = mk_node("ExprTupleIndex", 1, $1); }
+| nonblock_expr '[' index_expr ']' { $$ = mk_node("ExprIndex", 2, $1, $3); }
+| nonblock_expr '(' maybe_exprs ')' { $$ = mk_node("ExprCall", 2, $1, $3); }
+| '[' vec_expr ']' { $$ = mk_node("ExprVec", 1, $2); }
+| '(' maybe_exprs ')' { $$ = mk_node("ExprParen", 1, $2); }
+| CONTINUE { $$ = mk_node("ExprAgain", 0); }
+| CONTINUE lifetime { $$ = mk_node("ExprAgain", 1, $2); }
+| RETURN { $$ = mk_node("ExprRet", 0); }
+| RETURN expr { $$ = mk_node("ExprRet", 1, $2); }
+| BREAK { $$ = mk_node("ExprBreak", 0); }
+| BREAK lifetime { $$ = mk_node("ExprBreak", 1, $2); }
+| nonblock_expr '=' expr { $$ = mk_node("ExprAssign", 2, $1, $3); }
+| nonblock_expr SHLEQ expr { $$ = mk_node("ExprAssignShl", 2, $1, $3); }
+| nonblock_expr SHREQ expr { $$ = mk_node("ExprAssignShr", 2, $1, $3); }
+| nonblock_expr MINUSEQ expr { $$ = mk_node("ExprAssignSub", 2, $1, $3); }
+| nonblock_expr ANDEQ expr { $$ = mk_node("ExprAssignBitAnd", 2, $1, $3); }
+| nonblock_expr OREQ expr { $$ = mk_node("ExprAssignBitOr", 2, $1, $3); }
+| nonblock_expr PLUSEQ expr { $$ = mk_node("ExprAssignAdd", 2, $1, $3); }
+| nonblock_expr STAREQ expr { $$ = mk_node("ExprAssignMul", 2, $1, $3); }
+| nonblock_expr SLASHEQ expr { $$ = mk_node("ExprAssignDiv", 2, $1, $3); }
+| nonblock_expr CARETEQ expr { $$ = mk_node("ExprAssignBitXor", 2, $1, $3); }
+| nonblock_expr PERCENTEQ expr { $$ = mk_node("ExprAssignRem", 2, $1, $3); }
+| nonblock_expr OROR expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiOr"), $1, $3); }
+| nonblock_expr ANDAND expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiAnd"), $1, $3); }
+| nonblock_expr EQEQ expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiEq"), $1, $3); }
+| nonblock_expr NE expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiNe"), $1, $3); }
+| nonblock_expr '<' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiLt"), $1, $3); }
+| nonblock_expr '>' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiGt"), $1, $3); }
+| nonblock_expr LE expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiLe"), $1, $3); }
+| nonblock_expr GE expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiGe"), $1, $3); }
+| nonblock_expr '|' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitOr"), $1, $3); }
+| nonblock_expr '^' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitXor"), $1, $3); }
+| nonblock_expr '&' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitAnd"), $1, $3); }
+| nonblock_expr SHL expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiShl"), $1, $3); }
+| nonblock_expr SHR expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiShr"), $1, $3); }
+| nonblock_expr '+' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiAdd"), $1, $3); }
+| nonblock_expr '-' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiSub"), $1, $3); }
+| nonblock_expr '*' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiMul"), $1, $3); }
+| nonblock_expr '/' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiDiv"), $1, $3); }
+| nonblock_expr '%' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiRem"), $1, $3); }
+| nonblock_expr DOTDOT { $$ = mk_node("ExprRange", 2, $1, mk_none()); }
+| nonblock_expr DOTDOT expr { $$ = mk_node("ExprRange", 2, $1, $3); }
+| DOTDOT expr { $$ = mk_node("ExprRange", 2, mk_none(), $2); }
+| nonblock_expr AS ty { $$ = mk_node("ExprCast", 2, $1, $3); }
+| BOX nonparen_expr { $$ = mk_node("ExprBox", 1, $2); }
+| %prec BOXPLACE BOX '(' maybe_expr ')' nonblock_expr { $$ = mk_node("ExprBox", 2, $3, $5); }
+| nonblock_prefix_expr
+;
+
+expr
+: lit { $$ = mk_node("ExprLit", 1, $1); }
+| %prec IDENT
+ path_expr { $$ = mk_node("ExprPath", 1, $1); }
+| SELF { $$ = mk_node("ExprPath", 1, mk_node("ident", 1, mk_atom("self"))); }
+| macro_expr { $$ = mk_node("ExprMac", 1, $1); }
+| path_expr '{' struct_expr_fields '}' { $$ = mk_node("ExprStruct", 2, $1, $3); }
+| expr '.' path_generic_args_with_colons { $$ = mk_node("ExprField", 2, $1, $3); }
+| expr '.' LIT_INTEGER { $$ = mk_node("ExprTupleIndex", 1, $1); }
+| expr '[' index_expr ']' { $$ = mk_node("ExprIndex", 2, $1, $3); }
+| expr '(' maybe_exprs ')' { $$ = mk_node("ExprCall", 2, $1, $3); }
+| '(' maybe_exprs ')' { $$ = mk_node("ExprParen", 1, $2); }
+| '[' vec_expr ']' { $$ = mk_node("ExprVec", 1, $2); }
+| CONTINUE { $$ = mk_node("ExprAgain", 0); }
+| CONTINUE ident { $$ = mk_node("ExprAgain", 1, $2); }
+| RETURN { $$ = mk_node("ExprRet", 0); }
+| RETURN expr { $$ = mk_node("ExprRet", 1, $2); }
+| BREAK { $$ = mk_node("ExprBreak", 0); }
+| BREAK ident { $$ = mk_node("ExprBreak", 1, $2); }
+| expr '=' expr { $$ = mk_node("ExprAssign", 2, $1, $3); }
+| expr SHLEQ expr { $$ = mk_node("ExprAssignShl", 2, $1, $3); }
+| expr SHREQ expr { $$ = mk_node("ExprAssignShr", 2, $1, $3); }
+| expr MINUSEQ expr { $$ = mk_node("ExprAssignSub", 2, $1, $3); }
+| expr ANDEQ expr { $$ = mk_node("ExprAssignBitAnd", 2, $1, $3); }
+| expr OREQ expr { $$ = mk_node("ExprAssignBitOr", 2, $1, $3); }
+| expr PLUSEQ expr { $$ = mk_node("ExprAssignAdd", 2, $1, $3); }
+| expr STAREQ expr { $$ = mk_node("ExprAssignMul", 2, $1, $3); }
+| expr SLASHEQ expr { $$ = mk_node("ExprAssignDiv", 2, $1, $3); }
+| expr CARETEQ expr { $$ = mk_node("ExprAssignBitXor", 2, $1, $3); }
+| expr PERCENTEQ expr { $$ = mk_node("ExprAssignRem", 2, $1, $3); }
+| expr OROR expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiOr"), $1, $3); }
+| expr ANDAND expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiAnd"), $1, $3); }
+| expr EQEQ expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiEq"), $1, $3); }
+| expr NE expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiNe"), $1, $3); }
+| expr '<' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiLt"), $1, $3); }
+| expr '>' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiGt"), $1, $3); }
+| expr LE expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiLe"), $1, $3); }
+| expr GE expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiGe"), $1, $3); }
+| expr '|' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitOr"), $1, $3); }
+| expr '^' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitXor"), $1, $3); }
+| expr '&' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitAnd"), $1, $3); }
+| expr SHL expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiShl"), $1, $3); }
+| expr SHR expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiShr"), $1, $3); }
+| expr '+' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiAdd"), $1, $3); }
+| expr '-' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiSub"), $1, $3); }
+| expr '*' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiMul"), $1, $3); }
+| expr '/' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiDiv"), $1, $3); }
+| expr '%' expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiRem"), $1, $3); }
+| expr DOTDOT { $$ = mk_node("ExprRange", 2, $1, mk_none()); }
+| expr DOTDOT expr { $$ = mk_node("ExprRange", 2, $1, $3); }
+| DOTDOT expr { $$ = mk_node("ExprRange", 2, mk_none(), $2); }
+| expr AS ty { $$ = mk_node("ExprCast", 2, $1, $3); }
+| BOX nonparen_expr { $$ = mk_node("ExprBox", 1, $2); }
+| %prec BOXPLACE BOX '(' maybe_expr ')' expr { $$ = mk_node("ExprBox", 2, $3, $5); }
+| block_expr
+| block
+| nonblock_prefix_expr
+;
+
+nonparen_expr
+: lit { $$ = mk_node("ExprLit", 1, $1); }
+| %prec IDENT
+ path_expr { $$ = mk_node("ExprPath", 1, $1); }
+| SELF { $$ = mk_node("ExprPath", 1, mk_node("ident", 1, mk_atom("self"))); }
+| macro_expr { $$ = mk_node("ExprMac", 1, $1); }
+| path_expr '{' struct_expr_fields '}' { $$ = mk_node("ExprStruct", 2, $1, $3); }
+| nonparen_expr '.' path_generic_args_with_colons { $$ = mk_node("ExprField", 2, $1, $3); }
+| nonparen_expr '.' LIT_INTEGER { $$ = mk_node("ExprTupleIndex", 1, $1); }
+| nonparen_expr '[' index_expr ']' { $$ = mk_node("ExprIndex", 2, $1, $3); }
+| nonparen_expr '(' maybe_exprs ')' { $$ = mk_node("ExprCall", 2, $1, $3); }
+| '[' vec_expr ']' { $$ = mk_node("ExprVec", 1, $2); }
+| CONTINUE { $$ = mk_node("ExprAgain", 0); }
+| CONTINUE ident { $$ = mk_node("ExprAgain", 1, $2); }
+| RETURN { $$ = mk_node("ExprRet", 0); }
+| RETURN expr { $$ = mk_node("ExprRet", 1, $2); }
+| BREAK { $$ = mk_node("ExprBreak", 0); }
+| BREAK ident { $$ = mk_node("ExprBreak", 1, $2); }
+| nonparen_expr '=' nonparen_expr { $$ = mk_node("ExprAssign", 2, $1, $3); }
+| nonparen_expr SHLEQ nonparen_expr { $$ = mk_node("ExprAssignShl", 2, $1, $3); }
+| nonparen_expr SHREQ nonparen_expr { $$ = mk_node("ExprAssignShr", 2, $1, $3); }
+| nonparen_expr MINUSEQ nonparen_expr { $$ = mk_node("ExprAssignSub", 2, $1, $3); }
+| nonparen_expr ANDEQ nonparen_expr { $$ = mk_node("ExprAssignBitAnd", 2, $1, $3); }
+| nonparen_expr OREQ nonparen_expr { $$ = mk_node("ExprAssignBitOr", 2, $1, $3); }
+| nonparen_expr PLUSEQ nonparen_expr { $$ = mk_node("ExprAssignAdd", 2, $1, $3); }
+| nonparen_expr STAREQ nonparen_expr { $$ = mk_node("ExprAssignMul", 2, $1, $3); }
+| nonparen_expr SLASHEQ nonparen_expr { $$ = mk_node("ExprAssignDiv", 2, $1, $3); }
+| nonparen_expr CARETEQ nonparen_expr { $$ = mk_node("ExprAssignBitXor", 2, $1, $3); }
+| nonparen_expr PERCENTEQ nonparen_expr { $$ = mk_node("ExprAssignRem", 2, $1, $3); }
+| nonparen_expr OROR nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiOr"), $1, $3); }
+| nonparen_expr ANDAND nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiAnd"), $1, $3); }
+| nonparen_expr EQEQ nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiEq"), $1, $3); }
+| nonparen_expr NE nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiNe"), $1, $3); }
+| nonparen_expr '<' nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiLt"), $1, $3); }
+| nonparen_expr '>' nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiGt"), $1, $3); }
+| nonparen_expr LE nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiLe"), $1, $3); }
+| nonparen_expr GE nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiGe"), $1, $3); }
+| nonparen_expr '|' nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitOr"), $1, $3); }
+| nonparen_expr '^' nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitXor"), $1, $3); }
+| nonparen_expr '&' nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitAnd"), $1, $3); }
+| nonparen_expr SHL nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiShl"), $1, $3); }
+| nonparen_expr SHR nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiShr"), $1, $3); }
+| nonparen_expr '+' nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiAdd"), $1, $3); }
+| nonparen_expr '-' nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiSub"), $1, $3); }
+| nonparen_expr '*' nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiMul"), $1, $3); }
+| nonparen_expr '/' nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiDiv"), $1, $3); }
+| nonparen_expr '%' nonparen_expr { $$ = mk_node("ExprBinary", 3, mk_atom("BiRem"), $1, $3); }
+| nonparen_expr DOTDOT { $$ = mk_node("ExprRange", 2, $1, mk_none()); }
+| nonparen_expr DOTDOT nonparen_expr { $$ = mk_node("ExprRange", 2, $1, $3); }
+| DOTDOT nonparen_expr { $$ = mk_node("ExprRange", 2, mk_none(), $2); }
+| nonparen_expr AS ty { $$ = mk_node("ExprCast", 2, $1, $3); }
+| BOX nonparen_expr { $$ = mk_node("ExprBox", 1, $2); }
+| %prec BOXPLACE BOX '(' maybe_expr ')' expr { $$ = mk_node("ExprBox", 1, $3, $5); }
+| block_expr
+| block
+| nonblock_prefix_expr
+;
+
+expr_norange
+: lit { $$ = mk_node("ExprLit", 1, $1); }
+| %prec IDENT
+ path_expr { $$ = mk_node("ExprPath", 1, $1); }
+| SELF { $$ = mk_node("ExprPath", 1, mk_node("ident", 1, mk_atom("self"))); }
+| macro_expr { $$ = mk_node("ExprMac", 1, $1); }
+| path_expr '{' struct_expr_fields '}' { $$ = mk_node("ExprStruct", 2, $1, $3); }
+| expr_norange '.' path_generic_args_with_colons { $$ = mk_node("ExprField", 2, $1, $3); }
+| expr_norange '.' LIT_INTEGER { $$ = mk_node("ExprTupleIndex", 1, $1); }
+| expr_norange '[' index_expr ']' { $$ = mk_node("ExprIndex", 2, $1, $3); }
+| expr_norange '(' maybe_exprs ')' { $$ = mk_node("ExprCall", 2, $1, $3); }
+| '(' maybe_exprs ')' { $$ = mk_node("ExprParen", 1, $2); }
+| '[' vec_expr ']' { $$ = mk_node("ExprVec", 1, $2); }
+| CONTINUE { $$ = mk_node("ExprAgain", 0); }
+| CONTINUE ident { $$ = mk_node("ExprAgain", 1, $2); }
+| RETURN { $$ = mk_node("ExprRet", 0); }
+| RETURN expr { $$ = mk_node("ExprRet", 1, $2); }
+| BREAK { $$ = mk_node("ExprBreak", 0); }
+| BREAK ident { $$ = mk_node("ExprBreak", 1, $2); }
+| expr_norange '=' expr_norange { $$ = mk_node("ExprAssign", 2, $1, $3); }
+| expr_norange SHLEQ expr_norange { $$ = mk_node("ExprAssignShl", 2, $1, $3); }
+| expr_norange SHREQ expr_norange { $$ = mk_node("ExprAssignShr", 2, $1, $3); }
+| expr_norange MINUSEQ expr_norange { $$ = mk_node("ExprAssignSub", 2, $1, $3); }
+| expr_norange ANDEQ expr_norange { $$ = mk_node("ExprAssignBitAnd", 2, $1, $3); }
+| expr_norange OREQ expr_norange { $$ = mk_node("ExprAssignBitOr", 2, $1, $3); }
+| expr_norange PLUSEQ expr_norange { $$ = mk_node("ExprAssignAdd", 2, $1, $3); }
+| expr_norange STAREQ expr_norange { $$ = mk_node("ExprAssignMul", 2, $1, $3); }
+| expr_norange SLASHEQ expr_norange { $$ = mk_node("ExprAssignDiv", 2, $1, $3); }
+| expr_norange CARETEQ expr_norange { $$ = mk_node("ExprAssignBitXor", 2, $1, $3); }
+| expr_norange PERCENTEQ expr_norange { $$ = mk_node("ExprAssignRem", 2, $1, $3); }
+| expr_norange OROR expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiOr"), $1, $3); }
+| expr_norange ANDAND expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiAnd"), $1, $3); }
+| expr_norange EQEQ expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiEq"), $1, $3); }
+| expr_norange NE expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiNe"), $1, $3); }
+| expr_norange '<' expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiLt"), $1, $3); }
+| expr_norange '>' expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiGt"), $1, $3); }
+| expr_norange LE expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiLe"), $1, $3); }
+| expr_norange GE expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiGe"), $1, $3); }
+| expr_norange '|' expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitOr"), $1, $3); }
+| expr_norange '^' expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitXor"), $1, $3); }
+| expr_norange '&' expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitAnd"), $1, $3); }
+| expr_norange SHL expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiShl"), $1, $3); }
+| expr_norange SHR expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiShr"), $1, $3); }
+| expr_norange '+' expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiAdd"), $1, $3); }
+| expr_norange '-' expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiSub"), $1, $3); }
+| expr_norange '*' expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiMul"), $1, $3); }
+| expr_norange '/' expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiDiv"), $1, $3); }
+| expr_norange '%' expr_norange { $$ = mk_node("ExprBinary", 3, mk_atom("BiRem"), $1, $3); }
+| expr_norange AS ty { $$ = mk_node("Expr_NorangeCast", 2, $1, $3); }
+| BOX nonparen_expr { $$ = mk_node("ExprBox", 1, $2); }
+| %prec BOXPLACE BOX '(' maybe_expr ')' expr_norange { $$ = mk_node("ExprBox", 2, $3, $5); }
+| block_expr
+| block
+| nonblock_prefix_expr
+;
+
+expr_nostruct
+: lit { $$ = mk_node("ExprLit", 1, $1); }
+| %prec IDENT
+ path_expr { $$ = mk_node("ExprPath", 1, $1); }
+| SELF { $$ = mk_node("ExprPath", 1, mk_node("ident", 1, mk_atom("self"))); }
+| macro_expr { $$ = mk_node("ExprMac", 1, $1); }
+| expr_nostruct '.' path_generic_args_with_colons { $$ = mk_node("ExprField", 2, $1, $3); }
+| expr_nostruct '.' LIT_INTEGER { $$ = mk_node("ExprTupleIndex", 1, $1); }
+| expr_nostruct '[' index_expr ']' { $$ = mk_node("ExprIndex", 2, $1, $3); }
+| expr_nostruct '(' maybe_exprs ')' { $$ = mk_node("ExprCall", 2, $1, $3); }
+| '[' vec_expr ']' { $$ = mk_node("ExprVec", 1, $2); }
+| '(' maybe_exprs ')' { $$ = mk_node("ExprParen", 1, $2); }
+| CONTINUE { $$ = mk_node("ExprAgain", 0); }
+| CONTINUE ident { $$ = mk_node("ExprAgain", 1, $2); }
+| RETURN { $$ = mk_node("ExprRet", 0); }
+| RETURN expr { $$ = mk_node("ExprRet", 1, $2); }
+| BREAK { $$ = mk_node("ExprBreak", 0); }
+| BREAK ident { $$ = mk_node("ExprBreak", 1, $2); }
+| expr_nostruct '=' expr_nostruct { $$ = mk_node("ExprAssign", 2, $1, $3); }
+| expr_nostruct SHLEQ expr_nostruct { $$ = mk_node("ExprAssignShl", 2, $1, $3); }
+| expr_nostruct SHREQ expr_nostruct { $$ = mk_node("ExprAssignShr", 2, $1, $3); }
+| expr_nostruct MINUSEQ expr_nostruct { $$ = mk_node("ExprAssignSub", 2, $1, $3); }
+| expr_nostruct ANDEQ expr_nostruct { $$ = mk_node("ExprAssignBitAnd", 2, $1, $3); }
+| expr_nostruct OREQ expr_nostruct { $$ = mk_node("ExprAssignBitOr", 2, $1, $3); }
+| expr_nostruct PLUSEQ expr_nostruct { $$ = mk_node("ExprAssignAdd", 2, $1, $3); }
+| expr_nostruct STAREQ expr_nostruct { $$ = mk_node("ExprAssignMul", 2, $1, $3); }
+| expr_nostruct SLASHEQ expr_nostruct { $$ = mk_node("ExprAssignDiv", 2, $1, $3); }
+| expr_nostruct CARETEQ expr_nostruct { $$ = mk_node("ExprAssignBitXor", 2, $1, $3); }
+| expr_nostruct PERCENTEQ expr_nostruct { $$ = mk_node("ExprAssignRem", 2, $1, $3); }
+| expr_nostruct OROR expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiOr"), $1, $3); }
+| expr_nostruct ANDAND expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiAnd"), $1, $3); }
+| expr_nostruct EQEQ expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiEq"), $1, $3); }
+| expr_nostruct NE expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiNe"), $1, $3); }
+| expr_nostruct '<' expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiLt"), $1, $3); }
+| expr_nostruct '>' expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiGt"), $1, $3); }
+| expr_nostruct LE expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiLe"), $1, $3); }
+| expr_nostruct GE expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiGe"), $1, $3); }
+| expr_nostruct '|' expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitOr"), $1, $3); }
+| expr_nostruct '^' expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitXor"), $1, $3); }
+| expr_nostruct '&' expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiBitAnd"), $1, $3); }
+| expr_nostruct SHL expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiShl"), $1, $3); }
+| expr_nostruct SHR expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiShr"), $1, $3); }
+| expr_nostruct '+' expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiAdd"), $1, $3); }
+| expr_nostruct '-' expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiSub"), $1, $3); }
+| expr_nostruct '*' expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiMul"), $1, $3); }
+| expr_nostruct '/' expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiDiv"), $1, $3); }
+| expr_nostruct '%' expr_nostruct { $$ = mk_node("ExprBinary", 3, mk_atom("BiRem"), $1, $3); }
+| expr_nostruct DOTDOT { $$ = mk_node("ExprRange", 2, $1, mk_none()); }
+| expr_nostruct DOTDOT expr_nostruct { $$ = mk_node("ExprRange", 2, $1, $3); }
+| DOTDOT expr_nostruct { $$ = mk_node("ExprRange", 2, mk_none(), $2); }
+| expr_nostruct AS ty { $$ = mk_node("ExprCast", 2, $1, $3); }
+| BOX nonparen_expr { $$ = mk_node("ExprBox", 1, $2); }
+| %prec BOXPLACE BOX '(' maybe_expr ')' expr_nostruct { $$ = mk_node("ExprBox", 1, $3, $5); }
+| block_expr
+| block
+| nonblock_prefix_expr_nostruct
+;
+
+nonblock_prefix_expr_nostruct
+: '-' expr_nostruct { $$ = mk_node("ExprUnary", 2, mk_atom("UnNeg"), $2); }
+| '!' expr_nostruct { $$ = mk_node("ExprUnary", 2, mk_atom("UnNot"), $2); }
+| '*' expr_nostruct { $$ = mk_node("ExprUnary", 2, mk_atom("UnDeref"), $2); }
+| '&' maybe_mut expr_nostruct { $$ = mk_node("ExprAddrOf", 2, $2, $3); }
+| ANDAND maybe_mut expr_nostruct { $$ = mk_node("ExprAddrOf", 1, mk_node("ExprAddrOf", 2, $2, $3)); }
+| lambda_expr_nostruct
+| MOVE lambda_expr_nostruct { $$ = $2; }
+| proc_expr_nostruct
+;
+
+nonblock_prefix_expr
+: '-' expr { $$ = mk_node("ExprUnary", 2, mk_atom("UnNeg"), $2); }
+| '!' expr { $$ = mk_node("ExprUnary", 2, mk_atom("UnNot"), $2); }
+| '*' expr { $$ = mk_node("ExprUnary", 2, mk_atom("UnDeref"), $2); }
+| '&' maybe_mut expr { $$ = mk_node("ExprAddrOf", 2, $2, $3); }
+| ANDAND maybe_mut expr { $$ = mk_node("ExprAddrOf", 1, mk_node("ExprAddrOf", 2, $2, $3)); }
+| lambda_expr
+| MOVE lambda_expr { $$ = $2; }
+| proc_expr
+;
+
+lambda_expr
+: %prec LAMBDA
+ OROR ret_ty expr { $$ = mk_node("ExprFnBlock", 3, mk_none(), $2, $3); }
+| %prec LAMBDA
+ '|' maybe_unboxed_closure_kind '|' ret_ty expr { $$ = mk_node("ExprFnBlock", 3, mk_none(), $4, $5); }
+| %prec LAMBDA
+ '|' inferrable_params '|' ret_ty expr { $$ = mk_node("ExprFnBlock", 3, $2, $4, $5); }
+| %prec LAMBDA
+ '|' '&' maybe_mut ':' inferrable_params '|' ret_ty expr { $$ = mk_node("ExprFnBlock", 3, $5, $7, $8); }
+| %prec LAMBDA
+ '|' ':' inferrable_params '|' ret_ty expr { $$ = mk_node("ExprFnBlock", 3, $3, $5, $6); }
+;
+
+lambda_expr_nostruct
+: %prec LAMBDA
+ OROR expr_nostruct { $$ = mk_node("ExprFnBlock", 2, mk_none(), $2); }
+| %prec LAMBDA
+ '|' maybe_unboxed_closure_kind '|' expr_nostruct { $$ = mk_node("ExprFnBlock", 2, mk_none(), $4); }
+| %prec LAMBDA
+ '|' inferrable_params '|' expr_nostruct { $$ = mk_node("ExprFnBlock", 2, $2, $4); }
+| %prec LAMBDA
+ '|' '&' maybe_mut ':' inferrable_params '|' expr_nostruct { $$ = mk_node("ExprFnBlock", 2, $5, $7); }
+| %prec LAMBDA
+ '|' ':' inferrable_params '|' expr_nostruct { $$ = mk_node("ExprFnBlock", 2, $3, $5); }
+
+;
+
+proc_expr
+: %prec LAMBDA
+ PROC '(' ')' expr { $$ = mk_node("ExprProc", 2, mk_none(), $4); }
+| %prec LAMBDA
+ PROC '(' inferrable_params ')' expr { $$ = mk_node("ExprProc", 2, $3, $5); }
+;
+
+proc_expr_nostruct
+: %prec LAMBDA
+ PROC '(' ')' expr_nostruct { $$ = mk_node("ExprProc", 2, mk_none(), $4); }
+| %prec LAMBDA
+ PROC '(' inferrable_params ')' expr_nostruct { $$ = mk_node("ExprProc", 2, $3, $5); }
+;
+
+vec_expr
+: maybe_exprs
+| exprs ';' expr { $$ = mk_node("VecRepeat", 2, $1, $3); }
+;
+
+index_expr
+: expr_norange { $$ = mk_node("Index", 1, $1); }
+| expr_norange DOTDOT { $$ = mk_node("SliceToEnd", 1, $1); }
+| DOTDOT expr_norange { $$ = mk_node("SliceFromBeginning", 1, $2); }
+| expr_norange DOTDOT expr_norange { $$ = mk_node("Slice", 2, $1, $3); }
+| %empty { $$ = mk_none(); }
+;
+
+struct_expr_fields
+: field_inits
+| field_inits ','
+| maybe_field_inits default_field_init { $$ = ext_node($1, 1, $2); }
+;
+
+maybe_field_inits
+: field_inits
+| field_inits ','
+| %empty { $$ = mk_none(); }
+;
+
+field_inits
+: field_init { $$ = mk_node("FieldInits", 1, $1); }
+| field_inits ',' field_init { $$ = ext_node($1, 1, $3); }
+;
+
+field_init
+: ident ':' expr { $$ = mk_node("FieldInit", 2, $1, $3); }
+;
+
+default_field_init
+: DOTDOT expr { $$ = mk_node("DefaultFieldInit", 1, $2); }
+;
+
+block_expr
+: expr_match
+| expr_if
+| expr_if_let
+| expr_while
+| expr_while_let
+| expr_loop
+| expr_for
+| UNSAFE block { $$ = mk_node("UnsafeBlock", 1, $2); }
+| path_expr '!' maybe_ident braces_delimited_token_trees { $$ = mk_node("Macro", 3, $1, $3, $4); }
+;
+
+full_block_expr
+: block_expr
+| full_block_expr '.' path_generic_args_with_colons { $$ = mk_node("ExprField", 2, $1, $3); }
+| full_block_expr '.' LIT_INTEGER { $$ = mk_node("ExprTupleIndex", 1, $1); }
+;
+
+expr_match
+: MATCH expr_nostruct '{' '}' { $$ = mk_node("ExprMatch", 1, $2); }
+| MATCH expr_nostruct '{' match_clauses '}' { $$ = mk_node("ExprMatch", 2, $2, $4); }
+| MATCH expr_nostruct '{' match_clauses nonblock_match_clause '}' { $$ = mk_node("ExprMatch", 2, $2, ext_node($4, 1, $5)); }
+| MATCH expr_nostruct '{' nonblock_match_clause '}' { $$ = mk_node("ExprMatch", 2, $2, mk_node("Arms", 1, $4)); }
+;
+
+match_clauses
+: match_clause { $$ = mk_node("Arms", 1, $1); }
+| match_clauses match_clause { $$ = ext_node($1, 1, $2); }
+;
+
+match_clause
+: nonblock_match_clause ','
+| block_match_clause
+| block_match_clause ','
+;
+
+nonblock_match_clause
+: maybe_outer_attrs pats_or maybe_guard FAT_ARROW nonblock_expr { $$ = mk_node("Arm", 4, $1, $2, $3, $5); }
+| maybe_outer_attrs pats_or maybe_guard FAT_ARROW full_block_expr { $$ = mk_node("Arm", 4, $1, $2, $3, $5); }
+;
+
+block_match_clause
+: maybe_outer_attrs pats_or maybe_guard FAT_ARROW block { $$ = mk_node("Arm", 4, $1, $2, $3, $5); }
+;
+
+maybe_guard
+: IF expr_nostruct { $$ = $2; }
+| %empty { $$ = mk_none(); }
+;
+
+expr_if
+: IF expr_nostruct block { $$ = mk_node("ExprIf", 2, $2, $3); }
+| IF expr_nostruct block ELSE block_or_if { $$ = mk_node("ExprIf", 3, $2, $3, $5); }
+;
+
+expr_if_let
+: IF LET pat '=' expr_nostruct block { $$ = mk_node("ExprIfLet", 3, $3, $5, $6); }
+| IF LET pat '=' expr_nostruct block ELSE block_or_if { $$ = mk_node("ExprIfLet", 4, $3, $5, $6, $8); }
+;
+
+block_or_if
+: block
+| expr_if
+| expr_if_let
+;
+
+expr_while
+: maybe_label WHILE expr_nostruct block { $$ = mk_node("ExprWhile", 3, $1, $3, $4); }
+;
+
+expr_while_let
+: maybe_label WHILE LET pat '=' expr_nostruct block { $$ = mk_node("ExprWhileLet", 4, $1, $4, $6, $7); }
+;
+
+expr_loop
+: maybe_label LOOP block { $$ = mk_node("ExprLoop", 2, $1, $3); }
+;
+
+expr_for
+: maybe_label FOR pat IN expr_nostruct block { $$ = mk_node("ExprForLoop", 4, $1, $3, $5, $6); }
+;
+
+maybe_label
+: lifetime ':'
+| %empty { $$ = mk_none(); }
+;
+
+let
+: LET pat maybe_ty_ascription maybe_init_expr ';' { $$ = mk_node("DeclLocal", 3, $2, $3, $4); }
+;
+
+////////////////////////////////////////////////////////////////////////
+// Part 5: Macros and misc. rules
+////////////////////////////////////////////////////////////////////////
+
+lit
+: LIT_BYTE { $$ = mk_node("LitByte", 1, mk_atom(yytext)); }
+| LIT_CHAR { $$ = mk_node("LitChar", 1, mk_atom(yytext)); }
+| LIT_INTEGER { $$ = mk_node("LitInteger", 1, mk_atom(yytext)); }
+| LIT_FLOAT { $$ = mk_node("LitFloat", 1, mk_atom(yytext)); }
+| TRUE { $$ = mk_node("LitBool", 1, mk_atom(yytext)); }
+| FALSE { $$ = mk_node("LitBool", 1, mk_atom(yytext)); }
+| str
+;
+
+str
+: LIT_STR { $$ = mk_node("LitStr", 1, mk_atom(yytext), mk_atom("CookedStr")); }
+| LIT_STR_RAW { $$ = mk_node("LitStr", 1, mk_atom(yytext), mk_atom("RawStr")); }
+| LIT_BINARY { $$ = mk_node("LitBinary", 1, mk_atom(yytext), mk_atom("BinaryStr")); }
+| LIT_BINARY_RAW { $$ = mk_node("LitBinary", 1, mk_atom(yytext), mk_atom("RawBinaryStr")); }
+;
+
+maybe_ident
+: %empty { $$ = mk_none(); }
+| ident
+;
+
+ident
+: IDENT { $$ = mk_node("ident", 1, mk_atom(yytext)); }
+;
+
+unpaired_token
+: SHL { $$ = mk_atom(yytext); }
+| SHR { $$ = mk_atom(yytext); }
+| LE { $$ = mk_atom(yytext); }
+| EQEQ { $$ = mk_atom(yytext); }
+| NE { $$ = mk_atom(yytext); }
+| GE { $$ = mk_atom(yytext); }
+| ANDAND { $$ = mk_atom(yytext); }
+| OROR { $$ = mk_atom(yytext); }
+| SHLEQ { $$ = mk_atom(yytext); }
+| SHREQ { $$ = mk_atom(yytext); }
+| MINUSEQ { $$ = mk_atom(yytext); }
+| ANDEQ { $$ = mk_atom(yytext); }
+| OREQ { $$ = mk_atom(yytext); }
+| PLUSEQ { $$ = mk_atom(yytext); }
+| STAREQ { $$ = mk_atom(yytext); }
+| SLASHEQ { $$ = mk_atom(yytext); }
+| CARETEQ { $$ = mk_atom(yytext); }
+| PERCENTEQ { $$ = mk_atom(yytext); }
+| DOTDOT { $$ = mk_atom(yytext); }
+| DOTDOTDOT { $$ = mk_atom(yytext); }
+| MOD_SEP { $$ = mk_atom(yytext); }
+| RARROW { $$ = mk_atom(yytext); }
+| FAT_ARROW { $$ = mk_atom(yytext); }
+| LIT_BYTE { $$ = mk_atom(yytext); }
+| LIT_CHAR { $$ = mk_atom(yytext); }
+| LIT_INTEGER { $$ = mk_atom(yytext); }
+| LIT_FLOAT { $$ = mk_atom(yytext); }
+| LIT_STR { $$ = mk_atom(yytext); }
+| LIT_STR_RAW { $$ = mk_atom(yytext); }
+| LIT_BINARY { $$ = mk_atom(yytext); }
+| LIT_BINARY_RAW { $$ = mk_atom(yytext); }
+| IDENT { $$ = mk_atom(yytext); }
+| UNDERSCORE { $$ = mk_atom(yytext); }
+| LIFETIME { $$ = mk_atom(yytext); }
+| SELF { $$ = mk_atom(yytext); }
+| STATIC { $$ = mk_atom(yytext); }
+| AS { $$ = mk_atom(yytext); }
+| BREAK { $$ = mk_atom(yytext); }
+| CRATE { $$ = mk_atom(yytext); }
+| ELSE { $$ = mk_atom(yytext); }
+| ENUM { $$ = mk_atom(yytext); }
+| EXTERN { $$ = mk_atom(yytext); }
+| FALSE { $$ = mk_atom(yytext); }
+| FN { $$ = mk_atom(yytext); }
+| FOR { $$ = mk_atom(yytext); }
+| IF { $$ = mk_atom(yytext); }
+| IMPL { $$ = mk_atom(yytext); }
+| IN { $$ = mk_atom(yytext); }
+| LET { $$ = mk_atom(yytext); }
+| LOOP { $$ = mk_atom(yytext); }
+| MATCH { $$ = mk_atom(yytext); }
+| MOD { $$ = mk_atom(yytext); }
+| MOVE { $$ = mk_atom(yytext); }
+| MUT { $$ = mk_atom(yytext); }
+| PRIV { $$ = mk_atom(yytext); }
+| PUB { $$ = mk_atom(yytext); }
+| REF { $$ = mk_atom(yytext); }
+| RETURN { $$ = mk_atom(yytext); }
+| STRUCT { $$ = mk_atom(yytext); }
+| TRUE { $$ = mk_atom(yytext); }
+| TRAIT { $$ = mk_atom(yytext); }
+| TYPE { $$ = mk_atom(yytext); }
+| UNSAFE { $$ = mk_atom(yytext); }
+| USE { $$ = mk_atom(yytext); }
+| WHILE { $$ = mk_atom(yytext); }
+| CONTINUE { $$ = mk_atom(yytext); }
+| PROC { $$ = mk_atom(yytext); }
+| BOX { $$ = mk_atom(yytext); }
+| CONST { $$ = mk_atom(yytext); }
+| WHERE { $$ = mk_atom(yytext); }
+| TYPEOF { $$ = mk_atom(yytext); }
+| INNER_DOC_COMMENT { $$ = mk_atom(yytext); }
+| OUTER_DOC_COMMENT { $$ = mk_atom(yytext); }
+| SHEBANG { $$ = mk_atom(yytext); }
+| STATIC_LIFETIME { $$ = mk_atom(yytext); }
+| ';' { $$ = mk_atom(yytext); }
+| ',' { $$ = mk_atom(yytext); }
+| '.' { $$ = mk_atom(yytext); }
+| '@' { $$ = mk_atom(yytext); }
+| '#' { $$ = mk_atom(yytext); }
+| '~' { $$ = mk_atom(yytext); }
+| ':' { $$ = mk_atom(yytext); }
+| '$' { $$ = mk_atom(yytext); }
+| '=' { $$ = mk_atom(yytext); }
+| '?' { $$ = mk_atom(yytext); }
+| '!' { $$ = mk_atom(yytext); }
+| '<' { $$ = mk_atom(yytext); }
+| '>' { $$ = mk_atom(yytext); }
+| '-' { $$ = mk_atom(yytext); }
+| '&' { $$ = mk_atom(yytext); }
+| '|' { $$ = mk_atom(yytext); }
+| '+' { $$ = mk_atom(yytext); }
+| '*' { $$ = mk_atom(yytext); }
+| '/' { $$ = mk_atom(yytext); }
+| '^' { $$ = mk_atom(yytext); }
+| '%' { $$ = mk_atom(yytext); }
+;
+
+token_trees
+: %empty { $$ = mk_node("TokenTrees", 0); }
+| token_trees token_tree { $$ = ext_node($1, 1, $2); }
+;
+
+token_tree
+: delimited_token_trees
+| unpaired_token { $$ = mk_node("TTTok", 1, $1); }
+;
+
+delimited_token_trees
+: parens_delimited_token_trees
+| braces_delimited_token_trees
+| brackets_delimited_token_trees
+;
+
+parens_delimited_token_trees
+: '(' token_trees ')'
+{
+ $$ = mk_node("TTDelim", 3,
+ mk_node("TTTok", 1, mk_atom("(")),
+ $2,
+ mk_node("TTTok", 1, mk_atom(")")));
+}
+;
+
+braces_delimited_token_trees
+: '{' token_trees '}'
+{
+ $$ = mk_node("TTDelim", 3,
+ mk_node("TTTok", 1, mk_atom("{")),
+ $2,
+ mk_node("TTTok", 1, mk_atom("}")));
+}
+;
+
+brackets_delimited_token_trees
+: '[' token_trees ']'
+{
+ $$ = mk_node("TTDelim", 3,
+ mk_node("TTTok", 1, mk_atom("[")),
+ $2,
+ mk_node("TTTok", 1, mk_atom("]")));
+}
+;
--- /dev/null
+#!/usr/bin/env python
+#
+# Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+import sys
+
+import os
+import subprocess
+import argparse
+
+# usage: testparser.py [-h] [-p PARSER [PARSER ...]] -s SOURCE_DIR
+
+# Parsers should read from stdin and return exit status 0 for a
+# successful parse, and nonzero for an unsuccessful parse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-p', '--parser', nargs='+')
+parser.add_argument('-s', '--source-dir', nargs=1, required=True)
+args = parser.parse_args(sys.argv[1:])
+
+total = 0
+ok = {}
+bad = {}
+for parser in args.parser:
+ ok[parser] = 0
+ bad[parser] = []
+devnull = open(os.devnull, 'w')
+print "\n"
+
+for base, dirs, files in os.walk(args.source_dir[0]):
+ for f in filter(lambda p: p.endswith('.rs'), files):
+ p = os.path.join(base, f)
+ compile_fail = 'compile-fail' in p
+ ignore = any('ignore-test' in line or 'ignore-lexer-test' in line
+ for line in open(p).readlines())
+ if compile_fail or ignore:
+ continue
+ total += 1
+ for parser in args.parser:
+ if subprocess.call(parser, stdin=open(p), stderr=subprocess.STDOUT, stdout=devnull) == 0:
+ ok[parser] += 1
+ else:
+ bad[parser].append(p)
+ parser_stats = ', '.join(['{}: {}'.format(parser, ok[parser]) for parser in args.parser])
+ sys.stdout.write("\033[K\r total: {}, {}, scanned {}"
+ .format(total, os.path.relpath(parser_stats), os.path.relpath(p)))
+
+devnull.close()
+
+print "\n"
+
+for parser in args.parser:
+ filename = os.path.basename(parser) + '.bad'
+ print("writing {} files that failed to parse with {} to {}".format(len(bad[parser]), parser, filename))
+ with open(filename, "w") as f:
+ for p in bad[parser]:
+ f.write(p)
+ f.write("\n")
--- /dev/null
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+enum Token {
+ SHL = 257, // Parser generators reserve 0-256 for char literals
+ SHR,
+ LE,
+ EQEQ,
+ NE,
+ GE,
+ ANDAND,
+ OROR,
+ SHLEQ,
+ SHREQ,
+ MINUSEQ,
+ ANDEQ,
+ OREQ,
+ PLUSEQ,
+ STAREQ,
+ SLASHEQ,
+ CARETEQ,
+ PERCENTEQ,
+ DOTDOT,
+ DOTDOTDOT,
+ MOD_SEP,
+ RARROW,
+ FAT_ARROW,
+ LIT_BYTE,
+ LIT_CHAR,
+ LIT_INTEGER,
+ LIT_FLOAT,
+ LIT_STR,
+ LIT_STR_RAW,
+ LIT_BINARY,
+ LIT_BINARY_RAW,
+ IDENT,
+ UNDERSCORE,
+ LIFETIME,
+
+ // keywords
+ SELF,
+ STATIC,
+ AS,
+ BREAK,
+ CRATE,
+ ELSE,
+ ENUM,
+ EXTERN,
+ FALSE,
+ FN,
+ FOR,
+ IF,
+ IMPL,
+ IN,
+ LET,
+ LOOP,
+ MATCH,
+ MOD,
+ MOVE,
+ MUT,
+ PRIV,
+ PUB,
+ REF,
+ RETURN,
+ STRUCT,
+ TRUE,
+ TRAIT,
+ TYPE,
+ UNSAFE,
+ USE,
+ WHILE,
+ CONTINUE,
+ PROC,
+ BOX,
+ CONST,
+ WHERE,
+ TYPEOF,
+ INNER_DOC_COMMENT,
+ OUTER_DOC_COMMENT,
+
+ SHEBANG,
+ SHEBANG_LINE,
+ STATIC_LIFETIME
+};
extern crate syntax;
extern crate rustc;
-extern crate regex;
-
#[macro_use]
extern crate log;
use std::collections::HashMap;
use std::io::File;
-use regex::Regex;
use syntax::parse;
use syntax::parse::lexer;
}
fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>) -> TokenAndSpan {
- let re = Regex::new(
- r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]"
- ).unwrap();
-
- let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
- let start = m.name("start").unwrap_or("");
- let end = m.name("end").unwrap_or("");
- let toknum = m.name("toknum").unwrap_or("");
- let content = m.name("content").unwrap_or("");
+ // old regex:
+ // \[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]
+ let start = s.find_str("[@").unwrap();
+ let comma = start + s[start..].find_str(",").unwrap();
+ let colon = comma + s[comma..].find_str(":").unwrap();
+ let content_start = colon + s[colon..].find_str("='").unwrap();
+ let content_end = content_start + s[content_start..].find_str("',<").unwrap();
+ let toknum_end = content_end + s[content_end..].find_str(">,").unwrap();
+
+ let start = &s[comma + 1 .. colon];
+ let end = &s[colon + 1 .. content_start];
+ let content = &s[content_start + 2 .. content_end];
+ let toknum = &s[content_end + 3 .. toknum_end];
let proto_tok = tokens.get(toknum).expect(format!("didn't find token {:?} in the map",
toknum).as_slice());
/// ```
/// use std::sync::atomic::AtomicUsize;
///
- /// let atomic_forty_two = AtomicUsize::new(42u);
+ /// let atomic_forty_two = AtomicUsize::new(42);
/// ```
#[inline]
pub fn new(v: usize) -> AtomicUsize {
/// ```
/// use std::sync::atomic::AtomicPtr;
///
- /// let ptr = &mut 5i;
+ /// let ptr = &mut 5;
/// let atomic_ptr = AtomicPtr::new(ptr);
/// ```
#[inline]
/// ```
/// use std::sync::atomic::{AtomicPtr, Ordering};
///
- /// let ptr = &mut 5i;
+ /// let ptr = &mut 5;
/// let some_ptr = AtomicPtr::new(ptr);
///
/// let value = some_ptr.load(Ordering::Relaxed);
/// ```
/// use std::sync::atomic::{AtomicPtr, Ordering};
///
- /// let ptr = &mut 5i;
+ /// let ptr = &mut 5;
/// let some_ptr = AtomicPtr::new(ptr);
///
- /// let other_ptr = &mut 10i;
+ /// let other_ptr = &mut 10;
///
/// some_ptr.store(other_ptr, Ordering::Relaxed);
/// ```
/// ```
/// use std::sync::atomic::{AtomicPtr, Ordering};
///
- /// let ptr = &mut 5i;
+ /// let ptr = &mut 5;
/// let some_ptr = AtomicPtr::new(ptr);
///
- /// let other_ptr = &mut 10i;
+ /// let other_ptr = &mut 10;
///
/// let value = some_ptr.swap(other_ptr, Ordering::Relaxed);
/// ```
/// ```
/// use std::sync::atomic::{AtomicPtr, Ordering};
///
- /// let ptr = &mut 5i;
+ /// let ptr = &mut 5;
/// let some_ptr = AtomicPtr::new(ptr);
///
- /// let other_ptr = &mut 10i;
- /// let another_ptr = &mut 10i;
+ /// let other_ptr = &mut 10;
+ /// let another_ptr = &mut 10;
///
/// let value = some_ptr.compare_and_swap(other_ptr, another_ptr, Ordering::Relaxed);
/// ```
//!
//! fn main() {
//! let shared_map: Rc<RefCell<_>> = Rc::new(RefCell::new(HashMap::new()));
-//! shared_map.borrow_mut().insert("africa", 92388i);
-//! shared_map.borrow_mut().insert("kyoto", 11837i);
-//! shared_map.borrow_mut().insert("piccadilly", 11826i);
-//! shared_map.borrow_mut().insert("marbles", 38i);
+//! shared_map.borrow_mut().insert("africa", 92388);
+//! shared_map.borrow_mut().insert("kyoto", 11837);
+//! shared_map.borrow_mut().insert("piccadilly", 11826);
+//! shared_map.borrow_mut().insert("marbles", 38);
//! }
//! ```
//!
if num < 10 {
Some(transmute(('0' as uint + num) as u32))
} else {
- Some(transmute(('a' as uint + num - 10u) as u32))
+ Some(transmute(('a' as uint + num - 10) as u32))
}
}
} else {
}
let val = match self {
'0' ... '9' => self as uint - ('0' as uint),
- 'a' ... 'z' => self as uint + 10u - ('a' as uint),
- 'A' ... 'Z' => self as uint + 10u - ('A' as uint),
+ 'a' ... 'z' => self as uint + 10 - ('a' as uint),
+ 'A' ... 'Z' => self as uint + 10 - ('A' as uint),
_ => return None,
};
if val < radix { Some(val) }
fn len_utf8(self) -> uint {
let code = self as u32;
match () {
- _ if code < MAX_ONE_B => 1u,
- _ if code < MAX_TWO_B => 2u,
- _ if code < MAX_THREE_B => 3u,
- _ => 4u,
+ _ if code < MAX_ONE_B => 1,
+ _ if code < MAX_TWO_B => 2,
+ _ if code < MAX_THREE_B => 3,
+ _ => 4,
}
}
#[inline]
#[unstable = "pending decision about Iterator/Writer/Reader"]
fn encode_utf8(self, dst: &mut [u8]) -> Option<uint> {
- // Marked #[inline] to allow llvm optimizing it away
- let code = self as u32;
- if code < MAX_ONE_B && dst.len() >= 1 {
- dst[0] = code as u8;
- Some(1)
- } else if code < MAX_TWO_B && dst.len() >= 2 {
- dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B;
- dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT;
- Some(2)
- } else if code < MAX_THREE_B && dst.len() >= 3 {
- dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B;
- dst[1] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
- dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT;
- Some(3)
- } else if dst.len() >= 4 {
- dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B;
- dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT;
- dst[2] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
- dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT;
- Some(4)
- } else {
- None
- }
+ encode_utf8_raw(self as u32, dst)
}
#[inline]
#[unstable = "pending decision about Iterator/Writer/Reader"]
fn encode_utf16(self, dst: &mut [u16]) -> Option<uint> {
- // Marked #[inline] to allow llvm optimizing it away
- let mut ch = self as u32;
- if (ch & 0xFFFF_u32) == ch && dst.len() >= 1 {
- // The BMP falls through (assuming non-surrogate, as it should)
- dst[0] = ch as u16;
- Some(1)
- } else if dst.len() >= 2 {
- // Supplementary planes break into surrogates.
- ch -= 0x1_0000_u32;
- dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
- dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
- Some(2)
- } else {
- None
- }
+ encode_utf16_raw(self as u32, dst)
+ }
+}
+
+/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
+/// and then returns the number of bytes written.
+///
+/// If the buffer is not large enough, nothing will be written into it
+/// and a `None` will be returned.
+#[inline]
+#[unstable]
+pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> Option<uint> {
+ // Marked #[inline] to allow llvm optimizing it away
+ if code < MAX_ONE_B && dst.len() >= 1 {
+ dst[0] = code as u8;
+ Some(1)
+ } else if code < MAX_TWO_B && dst.len() >= 2 {
+ dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B;
+ dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT;
+ Some(2)
+ } else if code < MAX_THREE_B && dst.len() >= 3 {
+ dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B;
+ dst[1] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
+ dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT;
+ Some(3)
+ } else if dst.len() >= 4 {
+ dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B;
+ dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT;
+ dst[2] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
+ dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT;
+ Some(4)
+ } else {
+ None
+ }
+}
+
+/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
+/// and then returns the number of `u16`s written.
+///
+/// If the buffer is not large enough, nothing will be written into it
+/// and a `None` will be returned.
+#[inline]
+#[unstable]
+pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option<uint> {
+ // Marked #[inline] to allow llvm optimizing it away
+ if (ch & 0xFFFF_u32) == ch && dst.len() >= 1 {
+ // The BMP falls through (assuming non-surrogate, as it should)
+ dst[0] = ch as u16;
+ Some(1)
+ } else if dst.len() >= 2 {
+ // Supplementary planes break into surrogates.
+ ch -= 0x1_0000_u32;
+ dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
+ dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
+ Some(2)
+ } else {
+ None
}
}
Some('u')
}
EscapeUnicodeState::LeftBrace => {
- let mut n = 0u;
+ let mut n = 0;
while (self.c as u32) >> (4 * (n + 1)) != 0 {
n += 1;
}
pub enum Ordering {
/// An ordering where a compared value is less [than another].
#[stable]
- Less = -1i,
+ Less = -1,
/// An ordering where a compared value is equal [to another].
#[stable]
- Equal = 0i,
+ Equal = 0,
/// An ordering where a compared value is greater [than another].
#[stable]
- Greater = 1i,
+ Greater = 1,
}
impl Ordering {
/// assert_eq!(Equal.reverse(), Equal);
/// assert_eq!(Greater.reverse(), Less);
///
- /// let mut data: &mut [_] = &mut [2u, 10, 5, 8];
+ /// let mut data: &mut [_] = &mut [2, 10, 5, 8];
///
/// // sort the array from largest to smallest.
/// data.sort_by(|a, b| a.cmp(b).reverse());
///
- /// let b: &mut [_] = &mut [10u, 8, 5, 2];
+ /// let b: &mut [_] = &mut [10, 8, 5, 2];
/// assert!(data == b);
/// ```
#[inline]
/// ```
/// use std::cmp::Ordering::{Less, Equal, Greater};
///
- /// assert_eq!( 5u.cmp(&10), Less); // because 5 < 10
- /// assert_eq!(10u.cmp(&5), Greater); // because 10 > 5
- /// assert_eq!( 5u.cmp(&5), Equal); // because 5 == 5
+ /// assert_eq!( 5.cmp(&10), Less); // because 5 < 10
+ /// assert_eq!(10.cmp(&5), Greater); // because 10 > 5
+ /// assert_eq!( 5.cmp(&5), Equal); // because 5 == 5
/// ```
#[stable]
fn cmp(&self, other: &Self) -> Ordering;
default_impl! { bool, false }
default_impl! { char, '\x00' }
-default_impl! { uint, 0u }
-default_impl! { u8, 0u8 }
-default_impl! { u16, 0u16 }
-default_impl! { u32, 0u32 }
-default_impl! { u64, 0u64 }
+default_impl! { uint, 0 }
+default_impl! { u8, 0 }
+default_impl! { u16, 0 }
+default_impl! { u32, 0 }
+default_impl! { u64, 0 }
-default_impl! { int, 0i }
-default_impl! { i8, 0i8 }
-default_impl! { i16, 0i16 }
-default_impl! { i32, 0i32 }
-default_impl! { i64, 0i64 }
+default_impl! { int, 0 }
+default_impl! { i8, 0 }
+default_impl! { i16, 0 }
+default_impl! { i32, 0 }
+default_impl! { i64, 0 }
default_impl! { f32, 0.0f32 }
default_impl! { f64, 0.0f64 }
SignNeg
}
-static DIGIT_E_RADIX: uint = ('e' as uint) - ('a' as uint) + 11u;
+static DIGIT_E_RADIX: uint = ('e' as uint) - ('a' as uint) + 11;
/// Converts a number to its string representation as a byte vector.
/// This is meant to be a common base implementation for all numeric string
if deccum != _0 || (limit_digits && exact && digit_count > 0) {
buf[end] = b'.';
end += 1;
- let mut dig = 0u;
+ let mut dig = 0;
// calculate new digits while
// - there is no limit and there are digits left
// Decrease the deccumulator one fractional digit at a time
deccum = deccum.fract();
- dig += 1u;
+ dig += 1;
}
// If digits are limited, and that limit has been reached,
};
let (pre_pad, post_pad) = match align {
- rt::AlignLeft => (0u, padding),
- rt::AlignRight | rt::AlignUnknown => (padding, 0u),
+ rt::AlignLeft => (0, padding),
+ rt::AlignRight | rt::AlignUnknown => (padding, 0),
rt::AlignCenter => (padding / 2, (padding + 1) / 2),
};
fn fmt(&self, f: &mut Formatter) -> Result {
try!(write!(f, "("));
let ($(ref $name,)*) = *self;
- let mut n = 0i;
+ let mut n = 0;
$(
if n > 0 {
try!(write!(f, ", "));
///
/// ```
/// use std::fmt::radix;
-/// assert_eq!(format!("{}", radix(55i, 36)), "1j".to_string());
+/// assert_eq!(format!("{}", radix(55, 36)), "1j".to_string());
/// ```
#[unstable = "may be renamed or move to a different module"]
pub fn radix<T>(x: T, base: u8) -> RadixFmt<T, Radix> {
let length = msg.len();
self.length += length;
- let mut needed = 0u;
+ let mut needed = 0;
if self.ntail != 0 {
needed = 8 - self.ntail;
//! translated to the `loop` below.
//!
//! ```
-//! let values = vec![1i, 2, 3];
+//! let values = vec![1, 2, 3];
//!
//! // "Syntactical sugar" taking advantage of an iterator
//! for &x in values.iter() {
/// # Examples
///
/// ```
- /// let a = [1i, 2, 3, 4, 5];
+ /// let a = [1, 2, 3, 4, 5];
/// assert!(a.iter().all(|x| *x > 0));
/// assert!(!a.iter().all(|x| *x > 2));
/// ```
/// ```
/// use std::iter::AdditiveIterator;
///
- /// let a = [1i, 2, 3, 4, 5];
+ /// let a = [1i32, 2, 3, 4, 5];
/// let mut it = a.iter().map(|&x| x);
/// assert!(it.sum() == 15);
/// ```
/// use std::iter::{count, MultiplicativeIterator};
///
/// fn factorial(n: usize) -> usize {
- /// count(1u, 1).take_while(|&i| i <= n).product()
+ /// count(1, 1).take_while(|&i| i <= n).product()
/// }
/// assert!(factorial(0) == 1);
/// assert!(factorial(1) == 1);
/// ```
/// let array = [0, 1, 2, 3, 4];
///
-/// for i in range(0, 5u) {
+/// for i in range(0, 5) {
/// println!("{}", i);
/// assert_eq!(i, array[i]);
/// }
/// let x = true;
/// assert!(x, "x wasn't true!");
///
-/// let a = 3i; let b = 27i;
+/// let a = 3; let b = 27;
/// assert!(a + b == 30, "a = {}, b = {}", a, b);
/// ```
#[macro_export]
/// # Example
///
/// ```
-/// let a = 3i;
-/// let b = 1i + 2i;
+/// let a = 3;
+/// let b = 1 + 2;
/// assert_eq!(a, b);
/// ```
#[macro_export]
/// # Example
///
/// ```
-/// let a = 3i;
-/// let b = 1i + 2i;
+/// let a = 3;
+/// let b = 1 + 2;
/// debug_assert_eq!(a, b);
/// ```
#[macro_export]
/// ```
/// use std::mem;
///
-/// let x = &mut 5i;
-/// let y = &mut 42i;
+/// let x = &mut 5;
+/// let y = &mut 42;
///
/// mem::swap(x, y);
///
-/// assert_eq!(42i, *x);
-/// assert_eq!(5i, *y);
+/// assert_eq!(42, *x);
+/// assert_eq!(5, *y);
/// ```
#[inline]
#[stable]
/// ```
/// use std::cell::RefCell;
///
-/// let x = RefCell::new(1i);
+/// let x = RefCell::new(1);
///
/// let mut mutable_borrow = x.borrow_mut();
/// *mutable_borrow = 1;
/// ```
/// use std::mem;
///
-/// let one = unsafe { mem::transmute_copy(&1i) };
+/// let one = unsafe { mem::transmute_copy(&1) };
///
-/// assert_eq!(1u, one);
+/// assert_eq!(1, one);
/// ```
#[inline]
#[stable]
use option::Option;
#[unstable = "pending integer conventions"]
-pub const RADIX: uint = 2u;
+pub const RADIX: uint = 2;
#[unstable = "pending integer conventions"]
-pub const MANTISSA_DIGITS: uint = 24u;
+pub const MANTISSA_DIGITS: uint = 24;
#[unstable = "pending integer conventions"]
-pub const DIGITS: uint = 6u;
+pub const DIGITS: uint = 6;
#[stable]
pub const EPSILON: f32 = 1.19209290e-07_f32;
// members of `Bounded` and `Float`.
#[unstable = "pending integer conventions"]
-pub const RADIX: uint = 2u;
+pub const RADIX: uint = 2;
-pub const MANTISSA_DIGITS: uint = 53u;
+pub const MANTISSA_DIGITS: uint = 53;
#[unstable = "pending integer conventions"]
-pub const DIGITS: uint = 15u;
+pub const DIGITS: uint = 15;
#[stable]
pub const EPSILON: f64 = 2.2204460492503131e-16_f64;
/// ```rust
/// use std::num::Int;
///
- /// assert_eq!(2i.pow(4), 16);
+ /// assert_eq!(2.pow(4), 16);
/// ```
#[unstable = "pending integer conventions"]
#[inline]
/// ```
/// use std::num;
///
-/// let twenty: f32 = num::cast(0x14i).unwrap();
+/// let twenty: f32 = num::cast(0x14).unwrap();
/// assert_eq!(twenty, 20f32);
/// ```
///
let exp = match exp_info {
Some((c, offset)) => {
let base = match c {
- 'E' | 'e' if radix == 10 => 10u as $T,
- 'P' | 'p' if radix == 16 => 2u as $T,
+ 'E' | 'e' if radix == 10 => 10.0,
+ 'P' | 'p' if radix == 16 => 2.0,
_ => return None,
};
/// # Example
///
/// ```
- /// let mut x = Some(2u);
+ /// let mut x = Some(2);
/// match x.as_mut() {
/// Some(v) => *v = 42,
/// None => {},
/// }
- /// assert_eq!(x, Some(42u));
+ /// assert_eq!(x, Some(42));
/// ```
#[inline]
#[stable]
/// # Example
///
/// ```
- /// let k = 10u;
- /// assert_eq!(Some(4u).unwrap_or_else(|| 2 * k), 4u);
- /// assert_eq!(None.unwrap_or_else(|| 2 * k), 20u);
+ /// let k = 10i32;
+ /// assert_eq!(Some(4).unwrap_or_else(|| 2 * k), 4);
+ /// assert_eq!(None.unwrap_or_else(|| 2 * k), 20);
/// ```
#[inline]
#[stable]
///
/// ```
/// let x = Some("foo");
- /// assert_eq!(x.map_or(42u, |v| v.len()), 3u);
+ /// assert_eq!(x.map_or(42, |v| v.len()), 3);
///
/// let x: Option<&str> = None;
- /// assert_eq!(x.map_or(42u, |v| v.len()), 42u);
+ /// assert_eq!(x.map_or(42, |v| v.len()), 42);
/// ```
#[inline]
#[stable]
/// # Example
///
/// ```
- /// let k = 21u;
+ /// let k = 21;
///
/// let x = Some("foo");
- /// assert_eq!(x.map_or_else(|| 2 * k, |v| v.len()), 3u);
+ /// assert_eq!(x.map_or_else(|| 2 * k, |v| v.len()), 3);
///
/// let x: Option<&str> = None;
- /// assert_eq!(x.map_or_else(|| 2 * k, |v| v.len()), 42u);
+ /// assert_eq!(x.map_or_else(|| 2 * k, |v| v.len()), 42);
/// ```
#[inline]
#[stable]
///
/// ```
/// let x = Some("foo");
- /// assert_eq!(x.ok_or(0i), Ok("foo"));
+ /// assert_eq!(x.ok_or(0), Ok("foo"));
///
/// let x: Option<&str> = None;
- /// assert_eq!(x.ok_or(0i), Err(0i));
+ /// assert_eq!(x.ok_or(0), Err(0));
/// ```
#[inline]
#[unstable]
///
/// ```
/// let x = Some("foo");
- /// assert_eq!(x.ok_or_else(|| 0i), Ok("foo"));
+ /// assert_eq!(x.ok_or_else(|| 0), Ok("foo"));
///
/// let x: Option<&str> = None;
- /// assert_eq!(x.ok_or_else(|| 0i), Err(0i));
+ /// assert_eq!(x.ok_or_else(|| 0), Err(0));
/// ```
#[inline]
#[unstable]
/// # Example
///
/// ```
- /// let x = Some(4u);
+ /// let x = Some(4);
/// assert_eq!(x.iter().next(), Some(&4));
///
/// let x: Option<uint> = None;
/// # Example
///
/// ```
- /// let mut x = Some(4u);
+ /// let mut x = Some(4);
/// match x.iter_mut().next() {
- /// Some(&mut ref mut v) => *v = 42u,
+ /// Some(&mut ref mut v) => *v = 42,
/// None => {},
/// }
/// assert_eq!(x, Some(42));
/// # Example
///
/// ```
- /// let x = Some(2u);
+ /// let x = Some(2);
/// let y: Option<&str> = None;
/// assert_eq!(x.and(y), None);
///
/// let y = Some("foo");
/// assert_eq!(x.and(y), None);
///
- /// let x = Some(2u);
+ /// let x = Some(2);
/// let y = Some("foo");
/// assert_eq!(x.and(y), Some("foo"));
///
/// # Example
///
/// ```
- /// let x = Some(2u);
+ /// let x = Some(2);
/// let y = None;
- /// assert_eq!(x.or(y), Some(2u));
+ /// assert_eq!(x.or(y), Some(2));
///
/// let x = None;
- /// let y = Some(100u);
- /// assert_eq!(x.or(y), Some(100u));
+ /// let y = Some(100);
+ /// assert_eq!(x.or(y), Some(100));
///
- /// let x = Some(2u);
- /// let y = Some(100u);
- /// assert_eq!(x.or(y), Some(2u));
+ /// let x = Some(2);
+ /// let y = Some(100);
+ /// assert_eq!(x.or(y), Some(2));
///
/// let x: Option<uint> = None;
/// let y = None;
/// # Example
///
/// ```
- /// let mut x = Some(2u);
+ /// let mut x = Some(2);
/// x.take();
/// assert_eq!(x, None);
///
/// let good_year = good_year_from_input.parse().unwrap_or_default();
/// let bad_year = bad_year_from_input.parse().unwrap_or_default();
///
- /// assert_eq!(1909i, good_year);
- /// assert_eq!(0i, bad_year);
+ /// assert_eq!(1909, good_year);
+ /// assert_eq!(0, bad_year);
/// ```
#[inline]
#[stable]
/// ```rust
/// use std::uint;
///
- /// let v = vec!(1u, 2u);
+ /// let v = vec!(1, 2);
/// let res: Option<Vec<uint>> = v.iter().map(|&x: &uint|
/// if x == uint::MAX { None }
/// else { Some(x + 1) }
/// ).collect();
- /// assert!(res == Some(vec!(2u, 3u)));
+ /// assert!(res == Some(vec!(2, 3)));
/// ```
#[inline]
#[stable]
use self::Result::{Ok, Err};
use clone::Clone;
-use fmt::Display;
+use fmt::Debug;
use iter::{Iterator, IteratorExt, DoubleEndedIterator, FromIterator, ExactSizeIterator};
use ops::{FnMut, FnOnce};
use option::Option::{self, None, Some};
/// ```
/// fn stringify(x: uint) -> String { format!("error code: {}", x) }
///
- /// let x: Result<uint, uint> = Ok(2u);
- /// assert_eq!(x.map_err(stringify), Ok(2u));
+ /// let x: Result<uint, uint> = Ok(2);
+ /// assert_eq!(x.map_err(stringify), Ok(2));
///
/// let x: Result<uint, uint> = Err(13);
/// assert_eq!(x.map_err(stringify), Err("error code: 13".to_string()));
/// ```
/// let x: Result<uint, &str> = Ok(5);
/// let v: Vec<uint> = x.into_iter().collect();
- /// assert_eq!(v, vec![5u]);
+ /// assert_eq!(v, vec![5]);
///
/// let x: Result<uint, &str> = Err("nothing!");
/// let v: Vec<uint> = x.into_iter().collect();
/// # Example
///
/// ```
- /// let optb = 2u;
- /// let x: Result<uint, &str> = Ok(9u);
- /// assert_eq!(x.unwrap_or(optb), 9u);
+ /// let optb = 2;
+ /// let x: Result<uint, &str> = Ok(9);
+ /// assert_eq!(x.unwrap_or(optb), 9);
///
/// let x: Result<uint, &str> = Err("error");
/// assert_eq!(x.unwrap_or(optb), optb);
/// ```
/// fn count(x: &str) -> uint { x.len() }
///
- /// assert_eq!(Ok(2u).unwrap_or_else(count), 2u);
- /// assert_eq!(Err("foo").unwrap_or_else(count), 3u);
+ /// assert_eq!(Ok(2).unwrap_or_else(count), 2);
+ /// assert_eq!(Err("foo").unwrap_or_else(count), 3);
/// ```
#[inline]
#[stable]
}
#[stable]
-impl<T, E: Display> Result<T, E> {
+impl<T, E: Debug> Result<T, E> {
/// Unwraps a result, yielding the content of an `Ok`.
///
/// # Panics
/// # Example
///
/// ```
- /// let x: Result<uint, &str> = Ok(2u);
- /// assert_eq!(x.unwrap(), 2u);
+ /// let x: Result<uint, &str> = Ok(2);
+ /// assert_eq!(x.unwrap(), 2);
/// ```
///
/// ```{.should_fail}
match self {
Ok(t) => t,
Err(e) =>
- panic!("called `Result::unwrap()` on an `Err` value: {}", e)
+ panic!("called `Result::unwrap()` on an `Err` value: {:?}", e)
}
}
}
#[stable]
-impl<T: Display, E> Result<T, E> {
+impl<T: Debug, E> Result<T, E> {
/// Unwraps a result, yielding the content of an `Err`.
///
/// # Panics
/// # Example
///
/// ```{.should_fail}
- /// let x: Result<uint, &str> = Ok(2u);
+ /// let x: Result<uint, &str> = Ok(2);
/// x.unwrap_err(); // panics with `2`
/// ```
///
pub fn unwrap_err(self) -> E {
match self {
Ok(t) =>
- panic!("called `Result::unwrap_err()` on an `Ok` value: {}", t),
+ panic!("called `Result::unwrap_err()` on an `Ok` value: {:?}", t),
Err(e) => e
}
}
/// ```rust
/// use std::uint;
///
- /// let v = vec!(1u, 2u);
+ /// let v = vec!(1, 2);
/// let res: Result<Vec<uint>, &'static str> = v.iter().map(|&x: &uint|
/// if x == uint::MAX { Err("Overflow!") }
/// else { Ok(x + 1) }
/// ).collect();
- /// assert!(res == Ok(vec!(2u, 3u)));
+ /// assert!(res == Ok(vec!(2, 3)));
/// ```
#[inline]
fn from_iter<I: Iterator<Item=Result<A, E>>>(iter: I) -> Result<V, E> {
self.ptr = transmute(self.ptr as uint + 1);
// Use a non-null pointer value
- Some(transmute(1u))
+ Some(&mut *(1 as *mut _))
} else {
let old = self.ptr;
self.ptr = self.ptr.offset(1);
self.end = transmute(self.end as uint - 1);
// Use a non-null pointer value
- Some(transmute(1u))
+ Some(&mut *(1 as *mut _))
} else {
self.end = self.end.offset(-1);
if index < self.indexable() {
if mem::size_of::<T>() == 0 {
// Use a non-null pointer value
- Some(transmute(1u))
+ Some(&mut *(1 as *mut _))
} else {
Some(transmute(self.ptr.offset(index as int)))
}
(0, Some(0))
} else {
let x = self.v.len() - self.size;
- (x.saturating_add(1), x.checked_add(1u))
+ (x.saturating_add(1), x.checked_add(1))
}
}
}
#[deprecated = "use std::ffi::c_str_to_bytes + str::from_utf8"]
pub unsafe fn from_c_str(s: *const i8) -> &'static str {
let s = s as *const u8;
- let mut len = 0u;
+ let mut len = 0;
while *s.offset(len as int) != 0 {
- len += 1u;
+ len += 1;
}
let v: &'static [u8] = ::mem::transmute(Slice { data: s, len: len });
from_utf8(v).ok().expect("from_c_str passed invalid utf-8 data")
}
}
+/// Reads the next code point out of a byte iterator (assuming a
+/// UTF-8-like encoding).
+#[unstable]
+pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> {
+ // Decode UTF-8
+ let x = match bytes.next() {
+ None => return None,
+ Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
+ Some(&next_byte) => next_byte,
+ };
+
+ // Multibyte case follows
+ // Decode from a byte combination out of: [[[x y] z] w]
+ // NOTE: Performance is sensitive to the exact formulation here
+ let init = utf8_first_byte!(x, 2);
+ let y = unwrap_or_0(bytes.next());
+ let mut ch = utf8_acc_cont_byte!(init, y);
+ if x >= 0xE0 {
+ // [[x y z] w] case
+ // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
+ let z = unwrap_or_0(bytes.next());
+ let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
+ ch = init << 12 | y_z;
+ if x >= 0xF0 {
+ // [x y z w] case
+ // use only the lower 3 bits of `init`
+ let w = unwrap_or_0(bytes.next());
+ ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
+ }
+ }
+
+ Some(ch)
+}
+
#[stable]
impl<'a> Iterator for Chars<'a> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<char> {
- // Decode UTF-8, using the valid UTF-8 invariant
- let x = match self.iter.next() {
- None => return None,
- Some(&next_byte) if next_byte < 128 => return Some(next_byte as char),
- Some(&next_byte) => next_byte,
- };
-
- // Multibyte case follows
- // Decode from a byte combination out of: [[[x y] z] w]
- // NOTE: Performance is sensitive to the exact formulation here
- let init = utf8_first_byte!(x, 2);
- let y = unwrap_or_0(self.iter.next());
- let mut ch = utf8_acc_cont_byte!(init, y);
- if x >= 0xE0 {
- // [[x y z] w] case
- // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
- let z = unwrap_or_0(self.iter.next());
- let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
- ch = init << 12 | y_z;
- if x >= 0xF0 {
- // [x y z w] case
- // use only the lower 3 bits of `init`
- let w = unwrap_or_0(self.iter.next());
- ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
+ next_code_point(&mut self.iter).map(|ch| {
+ // str invariant says `ch` is a valid Unicode Scalar Value
+ unsafe {
+ mem::transmute(ch)
}
- }
-
- // str invariant says `ch` is a valid Unicode Scalar Value
- unsafe {
- Some(mem::transmute(ch))
- }
+ })
}
#[inline]
None => "",
Some(last) => {
let next = self.char_range_at(last).next;
- unsafe { self.slice_unchecked(0u, next) }
+ unsafe { self.slice_unchecked(0, next) }
}
}
}
#[inline]
fn char_range_at(&self, i: uint) -> CharRange {
- if self.as_bytes()[i] < 128u8 {
- return CharRange {ch: self.as_bytes()[i] as char, next: i + 1 };
- }
-
- // Multibyte case is a fn to allow char_range_at to inline cleanly
- fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
- let mut val = s.as_bytes()[i] as u32;
- let w = UTF8_CHAR_WIDTH[val as uint] as uint;
- assert!((w != 0));
-
- val = utf8_first_byte!(val, w);
- val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
- if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
- if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }
-
- return CharRange {ch: unsafe { mem::transmute(val) }, next: i + w};
- }
-
- return multibyte_char_range_at(self, i);
+ let (c, n) = char_range_at_raw(self.as_bytes(), i);
+ CharRange { ch: unsafe { mem::transmute(c) }, next: n }
}
#[inline]
fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
// while there is a previous byte == 10......
while i > 0 && s.as_bytes()[i] & !CONT_MASK == TAG_CONT_U8 {
- i -= 1u;
+ i -= 1;
}
let mut val = s.as_bytes()[i] as u32;
if self.is_empty() {
None
} else {
- let CharRange {ch, next} = self.char_range_at(0u);
+ let CharRange {ch, next} = self.char_range_at(0);
let next_s = unsafe { self.slice_unchecked(next, self.len()) };
Some((ch, next_s))
}
fn parse<T: FromStr>(&self) -> Option<T> { FromStr::from_str(self) }
}
+/// Pluck a code point out of a UTF-8-like byte slice and return the
+/// index of the next code point.
+#[inline]
+#[unstable]
+pub fn char_range_at_raw(bytes: &[u8], i: uint) -> (u32, usize) {
+ if bytes[i] < 128u8 {
+ return (bytes[i] as u32, i + 1);
+ }
+
+ // Multibyte case is a fn to allow char_range_at to inline cleanly
+ fn multibyte_char_range_at(bytes: &[u8], i: uint) -> (u32, usize) {
+ let mut val = bytes[i] as u32;
+ let w = UTF8_CHAR_WIDTH[val as uint] as uint;
+ assert!((w != 0));
+
+ val = utf8_first_byte!(val, w);
+ val = utf8_acc_cont_byte!(val, bytes[i + 1]);
+ if w > 2 { val = utf8_acc_cont_byte!(val, bytes[i + 2]); }
+ if w > 3 { val = utf8_acc_cont_byte!(val, bytes[i + 3]); }
+
+ return (val, i + w);
+ }
+
+ multibyte_char_range_at(bytes, i)
+}
+
#[stable]
impl<'a> Default for &'a str {
#[stable]
fn test_format_flags() {
// No residual flags left by pointer formatting
let p = "".as_ptr();
- assert_eq!(format!("{:p} {:x}", p, 16u), format!("{:p} 10", p));
+ assert_eq!(format!("{:p} {:x}", p, 16), format!("{:p} 10", p));
}
// Formatting integers should select the right implementation based off
// the type of the argument. Also, hex/octal/binary should be defined
// for integers, but they shouldn't emit the negative sign.
- assert!(format!("{}", 1i) == "1");
+ assert!(format!("{}", 1is) == "1");
assert!(format!("{}", 1i8) == "1");
assert!(format!("{}", 1i16) == "1");
assert!(format!("{}", 1i32) == "1");
assert!(format!("{}", 1i64) == "1");
- assert!(format!("{}", -1i) == "-1");
+ assert!(format!("{}", -1is) == "-1");
assert!(format!("{}", -1i8) == "-1");
assert!(format!("{}", -1i16) == "-1");
assert!(format!("{}", -1i32) == "-1");
assert!(format!("{}", -1i64) == "-1");
- assert!(format!("{:?}", 1i) == "1");
+ assert!(format!("{:?}", 1is) == "1");
assert!(format!("{:?}", 1i8) == "1");
assert!(format!("{:?}", 1i16) == "1");
assert!(format!("{:?}", 1i32) == "1");
assert!(format!("{:?}", 1i64) == "1");
- assert!(format!("{:b}", 1i) == "1");
+ assert!(format!("{:b}", 1is) == "1");
assert!(format!("{:b}", 1i8) == "1");
assert!(format!("{:b}", 1i16) == "1");
assert!(format!("{:b}", 1i32) == "1");
assert!(format!("{:b}", 1i64) == "1");
- assert!(format!("{:x}", 1i) == "1");
+ assert!(format!("{:x}", 1is) == "1");
assert!(format!("{:x}", 1i8) == "1");
assert!(format!("{:x}", 1i16) == "1");
assert!(format!("{:x}", 1i32) == "1");
assert!(format!("{:x}", 1i64) == "1");
- assert!(format!("{:X}", 1i) == "1");
+ assert!(format!("{:X}", 1is) == "1");
assert!(format!("{:X}", 1i8) == "1");
assert!(format!("{:X}", 1i16) == "1");
assert!(format!("{:X}", 1i32) == "1");
assert!(format!("{:X}", 1i64) == "1");
- assert!(format!("{:o}", 1i) == "1");
+ assert!(format!("{:o}", 1is) == "1");
assert!(format!("{:o}", 1i8) == "1");
assert!(format!("{:o}", 1i16) == "1");
assert!(format!("{:o}", 1i32) == "1");
assert!(format!("{:o}", 1i64) == "1");
- assert!(format!("{}", 1u) == "1");
+ assert!(format!("{}", 1us) == "1");
assert!(format!("{}", 1u8) == "1");
assert!(format!("{}", 1u16) == "1");
assert!(format!("{}", 1u32) == "1");
assert!(format!("{}", 1u64) == "1");
- assert!(format!("{:?}", 1u) == "1");
+ assert!(format!("{:?}", 1us) == "1");
assert!(format!("{:?}", 1u8) == "1");
assert!(format!("{:?}", 1u16) == "1");
assert!(format!("{:?}", 1u32) == "1");
assert!(format!("{:?}", 1u64) == "1");
- assert!(format!("{:b}", 1u) == "1");
+ assert!(format!("{:b}", 1us) == "1");
assert!(format!("{:b}", 1u8) == "1");
assert!(format!("{:b}", 1u16) == "1");
assert!(format!("{:b}", 1u32) == "1");
assert!(format!("{:b}", 1u64) == "1");
- assert!(format!("{:x}", 1u) == "1");
+ assert!(format!("{:x}", 1us) == "1");
assert!(format!("{:x}", 1u8) == "1");
assert!(format!("{:x}", 1u16) == "1");
assert!(format!("{:x}", 1u32) == "1");
assert!(format!("{:x}", 1u64) == "1");
- assert!(format!("{:X}", 1u) == "1");
+ assert!(format!("{:X}", 1us) == "1");
assert!(format!("{:X}", 1u8) == "1");
assert!(format!("{:X}", 1u16) == "1");
assert!(format!("{:X}", 1u32) == "1");
assert!(format!("{:X}", 1u64) == "1");
- assert!(format!("{:o}", 1u) == "1");
+ assert!(format!("{:o}", 1us) == "1");
assert!(format!("{:o}", 1u8) == "1");
assert!(format!("{:o}", 1u16) == "1");
assert!(format!("{:o}", 1u32) == "1");
assert!(format!("{:o}", 1u64) == "1");
// Test a larger number
- assert!(format!("{:b}", 55i) == "110111");
- assert!(format!("{:o}", 55i) == "67");
- assert!(format!("{}", 55i) == "55");
- assert!(format!("{:x}", 55i) == "37");
- assert!(format!("{:X}", 55i) == "37");
+ assert!(format!("{:b}", 55) == "110111");
+ assert!(format!("{:o}", 55) == "67");
+ assert!(format!("{}", 55) == "55");
+ assert!(format!("{:x}", 55) == "37");
+ assert!(format!("{:X}", 55) == "37");
}
#[test]
fn test_format_int_zero() {
- assert!(format!("{}", 0i) == "0");
- assert!(format!("{:?}", 0i) == "0");
- assert!(format!("{:b}", 0i) == "0");
- assert!(format!("{:o}", 0i) == "0");
- assert!(format!("{:x}", 0i) == "0");
- assert!(format!("{:X}", 0i) == "0");
-
- assert!(format!("{}", 0u) == "0");
- assert!(format!("{:?}", 0u) == "0");
- assert!(format!("{:b}", 0u) == "0");
- assert!(format!("{:o}", 0u) == "0");
- assert!(format!("{:x}", 0u) == "0");
- assert!(format!("{:X}", 0u) == "0");
+ assert!(format!("{}", 0) == "0");
+ assert!(format!("{:?}", 0) == "0");
+ assert!(format!("{:b}", 0) == "0");
+ assert!(format!("{:o}", 0) == "0");
+ assert!(format!("{:x}", 0) == "0");
+ assert!(format!("{:X}", 0) == "0");
+
+ assert!(format!("{}", 0u32) == "0");
+ assert!(format!("{:?}", 0u32) == "0");
+ assert!(format!("{:b}", 0u32) == "0");
+ assert!(format!("{:o}", 0u32) == "0");
+ assert!(format!("{:x}", 0u32) == "0");
+ assert!(format!("{:X}", 0u32) == "0");
}
#[test]
fn test_format_int_flags() {
- assert!(format!("{:3}", 1i) == " 1");
- assert!(format!("{:>3}", 1i) == " 1");
- assert!(format!("{:>+3}", 1i) == " +1");
- assert!(format!("{:<3}", 1i) == "1 ");
- assert!(format!("{:#}", 1i) == "1");
- assert!(format!("{:#x}", 10i) == "0xa");
- assert!(format!("{:#X}", 10i) == "0xA");
- assert!(format!("{:#5x}", 10i) == " 0xa");
- assert!(format!("{:#o}", 10i) == "0o12");
- assert!(format!("{:08x}", 10i) == "0000000a");
- assert!(format!("{:8x}", 10i) == " a");
- assert!(format!("{:<8x}", 10i) == "a ");
- assert!(format!("{:>8x}", 10i) == " a");
- assert!(format!("{:#08x}", 10i) == "0x00000a");
- assert!(format!("{:08}", -10i) == "-0000010");
+ assert!(format!("{:3}", 1) == " 1");
+ assert!(format!("{:>3}", 1) == " 1");
+ assert!(format!("{:>+3}", 1) == " +1");
+ assert!(format!("{:<3}", 1) == "1 ");
+ assert!(format!("{:#}", 1) == "1");
+ assert!(format!("{:#x}", 10) == "0xa");
+ assert!(format!("{:#X}", 10) == "0xA");
+ assert!(format!("{:#5x}", 10) == " 0xa");
+ assert!(format!("{:#o}", 10) == "0o12");
+ assert!(format!("{:08x}", 10) == "0000000a");
+ assert!(format!("{:8x}", 10) == " a");
+ assert!(format!("{:<8x}", 10) == "a ");
+ assert!(format!("{:>8x}", 10) == " a");
+ assert!(format!("{:#08x}", 10) == "0x00000a");
+ assert!(format!("{:08}", -10) == "-0000010");
assert!(format!("{:x}", -1u8) == "ff");
assert!(format!("{:X}", -1u8) == "FF");
assert!(format!("{:b}", -1u8) == "11111111");
#[test]
fn test_format_int_sign_padding() {
- assert!(format!("{:+5}", 1i) == " +1");
- assert!(format!("{:+5}", -1i) == " -1");
- assert!(format!("{:05}", 1i) == "00001");
- assert!(format!("{:05}", -1i) == "-0001");
- assert!(format!("{:+05}", 1i) == "+0001");
- assert!(format!("{:+05}", -1i) == "-0001");
+ assert!(format!("{:+5}", 1) == " +1");
+ assert!(format!("{:+5}", -1) == " -1");
+ assert!(format!("{:05}", 1) == "00001");
+ assert!(format!("{:05}", -1) == "-0001");
+ assert!(format!("{:+05}", 1) == "+0001");
+ assert!(format!("{:+05}", -1) == "-0001");
}
#[test]
#[test]
fn test_format_radix() {
- assert!(format!("{:04}", radix(3i, 2)) == "0011");
- assert!(format!("{}", radix(55i, 36)) == "1j");
+ assert!(format!("{:04}", radix(3, 2)) == "0011");
+ assert!(format!("{}", radix(55, 36)) == "1j");
}
#[test]
#[should_fail]
fn test_radix_base_too_large() {
- let _ = radix(55i, 37);
+ let _ = radix(55, 37);
}
-mod uint {
+mod u32 {
use test::Bencher;
use core::fmt::radix;
use std::rand::{weak_rng, Rng};
+ use std::io::util::NullWriter;
#[bench]
fn format_bin(b: &mut Bencher) {
let mut rng = weak_rng();
- b.iter(|| { format!("{:b}", rng.gen::<uint>()); })
+ b.iter(|| { write!(&mut NullWriter, "{:b}", rng.gen::<u32>()) })
}
#[bench]
fn format_oct(b: &mut Bencher) {
let mut rng = weak_rng();
- b.iter(|| { format!("{:o}", rng.gen::<uint>()); })
+ b.iter(|| { write!(&mut NullWriter, "{:o}", rng.gen::<u32>()) })
}
#[bench]
fn format_dec(b: &mut Bencher) {
let mut rng = weak_rng();
- b.iter(|| { format!("{}", rng.gen::<uint>()); })
+ b.iter(|| { write!(&mut NullWriter, "{}", rng.gen::<u32>()) })
}
#[bench]
fn format_hex(b: &mut Bencher) {
let mut rng = weak_rng();
- b.iter(|| { format!("{:x}", rng.gen::<uint>()); })
+ b.iter(|| { write!(&mut NullWriter, "{:x}", rng.gen::<u32>()) })
}
#[bench]
fn format_show(b: &mut Bencher) {
let mut rng = weak_rng();
- b.iter(|| { format!("{:?}", rng.gen::<uint>()); })
+ b.iter(|| { write!(&mut NullWriter, "{:?}", rng.gen::<u32>()) })
}
#[bench]
fn format_base_36(b: &mut Bencher) {
let mut rng = weak_rng();
- b.iter(|| { format!("{}", radix(rng.gen::<uint>(), 36)); })
+ b.iter(|| { write!(&mut NullWriter, "{}", radix(rng.gen::<u32>(), 36)) })
}
}
-mod int {
+mod i32 {
use test::Bencher;
use core::fmt::radix;
use std::rand::{weak_rng, Rng};
+ use std::io::util::NullWriter;
#[bench]
fn format_bin(b: &mut Bencher) {
let mut rng = weak_rng();
- b.iter(|| { format!("{:b}", rng.gen::<int>()); })
+ b.iter(|| { write!(&mut NullWriter, "{:b}", rng.gen::<i32>()) })
}
#[bench]
fn format_oct(b: &mut Bencher) {
let mut rng = weak_rng();
- b.iter(|| { format!("{:o}", rng.gen::<int>()); })
+ b.iter(|| { write!(&mut NullWriter, "{:o}", rng.gen::<i32>()) })
}
#[bench]
fn format_dec(b: &mut Bencher) {
let mut rng = weak_rng();
- b.iter(|| { format!("{}", rng.gen::<int>()); })
+ b.iter(|| { write!(&mut NullWriter, "{}", rng.gen::<i32>()) })
}
#[bench]
fn format_hex(b: &mut Bencher) {
let mut rng = weak_rng();
- b.iter(|| { format!("{:x}", rng.gen::<int>()); })
+ b.iter(|| { write!(&mut NullWriter, "{:x}", rng.gen::<i32>()) })
}
#[bench]
fn format_show(b: &mut Bencher) {
let mut rng = weak_rng();
- b.iter(|| { format!("{:?}", rng.gen::<int>()); })
+ b.iter(|| { write!(&mut NullWriter, "{:?}", rng.gen::<i32>()) })
}
#[bench]
fn format_base_36(b: &mut Bencher) {
let mut rng = weak_rng();
- b.iter(|| { format!("{}", radix(rng.gen::<int>(), 36)); })
+ b.iter(|| { write!(&mut NullWriter, "{}", radix(rng.gen::<i32>(), 36)) })
}
}
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-use regex::Regex;
use std::ascii::AsciiExt;
use std::cmp;
///
/// Valid log levels are 0-255, with the most likely ones being 1-4 (defined in
/// std::). Also supports string log levels of error, warn, info, and debug
-pub fn parse_logging_spec(spec: &str) -> (Vec<LogDirective>, Option<Regex>) {
+pub fn parse_logging_spec(spec: &str) -> (Vec<LogDirective>, Option<String>) {
let mut dirs = Vec::new();
let mut parts = spec.split('/');
});
}});
- let filter = filter.map_or(None, |filter| {
- match Regex::new(filter) {
- Ok(re) => Some(re),
- Err(e) => {
- println!("warning: invalid regex filter - {:?}", e);
- None
- }
- }
- });
-
- return (dirs, filter);
+ (dirs, filter.map(|s| s.to_string()))
}
#[cfg(test)]
//!
//! # Filtering results
//!
-//! A RUST_LOG directive may include a regex filter. The syntax is to append `/`
-//! followed by a regex. Each message is checked against the regex, and is only
-//! logged if it matches. Note that the matching is done after formatting the log
-//! string but before adding any logging meta-data. There is a single filter for all
-//! modules.
+//! A RUST_LOG directive may include a string filter. The syntax is to append
+//! `/` followed by a string. Each message is checked against the string and is
+//! only logged if it contains the string. Note that the matching is done after
+//! formatting the log string but before adding any logging meta-data. There is
+//! a single filter for all modules.
//!
//! Some examples:
//!
#![allow(unstable)]
#![deny(missing_docs)]
-extern crate regex;
-
use std::cell::RefCell;
use std::fmt;
use std::io::LineBufferedWriter;
use std::slice;
use std::sync::{Once, ONCE_INIT};
-use regex::Regex;
-
use directive::LOG_LEVEL_NAMES;
#[macro_use]
static mut DIRECTIVES: *const Vec<directive::LogDirective> =
0 as *const Vec<directive::LogDirective>;
-/// Optional regex filter.
-static mut FILTER: *const Regex = 0 as *const _;
+/// Optional filter.
+static mut FILTER: *const String = 0 as *const _;
/// Debug log level
pub const DEBUG: u32 = 4;
// Test the literal string from args against the current filter, if there
// is one.
match unsafe { FILTER.as_ref() } {
- Some(filter) if !filter.is_match(&args.to_string()[]) => return,
+ Some(filter) if !args.to_string().contains(&filter[]) => return,
_ => {}
}
DIRECTIVES = ptr::null();
if !FILTER.is_null() {
- let _filter: Box<Regex> = mem::transmute(FILTER);
- FILTER = ptr::null();
+ let _filter: Box<String> = mem::transmute(FILTER);
+ FILTER = 0 as *const _;
}
});
}
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// Enable this to squash warnings due to exporting pieces of the representation
-// for use with the regex! macro. See lib.rs for explanation.
-
-pub use self::Inst::*;
-
-use std::cmp;
-use std::iter::repeat;
-use parse;
-use parse::{
- Flags, FLAG_EMPTY,
- Nothing, Literal, Dot, AstClass, Begin, End, WordBoundary, Capture, Cat, Alt,
- Rep,
- ZeroOne, ZeroMore, OneMore,
-};
-
-type InstIdx = uint;
-
-#[derive(Show, Clone)]
-pub enum Inst {
- // When a Match instruction is executed, the current thread is successful.
- Match,
-
- // The OneChar instruction matches a literal character.
- // The flags indicate whether to do a case insensitive match.
- OneChar(char, Flags),
-
- // The CharClass instruction tries to match one input character against
- // the range of characters given.
- // The flags indicate whether to do a case insensitive match and whether
- // the character class is negated or not.
- CharClass(Vec<(char, char)>, Flags),
-
- // Matches any character except new lines.
- // The flags indicate whether to include the '\n' character.
- Any(Flags),
-
- // Matches the beginning of the string, consumes no characters.
- // The flags indicate whether it matches if the preceding character
- // is a new line.
- EmptyBegin(Flags),
-
- // Matches the end of the string, consumes no characters.
- // The flags indicate whether it matches if the proceeding character
- // is a new line.
- EmptyEnd(Flags),
-
- // Matches a word boundary (\w on one side and \W \A or \z on the other),
- // and consumes no character.
- // The flags indicate whether this matches a word boundary or something
- // that isn't a word boundary.
- EmptyWordBoundary(Flags),
-
- // Saves the current position in the input string to the Nth save slot.
- Save(uint),
-
- // Jumps to the instruction at the index given.
- Jump(InstIdx),
-
- // Jumps to the instruction at the first index given. If that leads to
- // a panic state, then the instruction at the second index given is
- // tried.
- Split(InstIdx, InstIdx),
-}
-
-/// Program represents a compiled regular expression. Once an expression is
-/// compiled, its representation is immutable and will never change.
-///
-/// All of the data in a compiled expression is wrapped in "MaybeStatic" or
-/// "MaybeOwned" types so that a `Program` can be represented as static data.
-/// (This makes it convenient and efficient for use with the `regex!` macro.)
-#[derive(Clone)]
-pub struct Program {
- /// A sequence of instructions.
- pub insts: Vec<Inst>,
- /// If the regular expression requires a literal prefix in order to have a
- /// match, that prefix is stored here. (It's used in the VM to implement
- /// an optimization.)
- pub prefix: String,
-}
-
-impl Program {
- /// Compiles a Regex given its AST.
- pub fn new(ast: parse::Ast) -> (Program, Vec<Option<String>>) {
- let mut c = Compiler {
- insts: Vec::with_capacity(100),
- names: Vec::with_capacity(10),
- };
-
- c.insts.push(Save(0));
- c.compile(ast);
- c.insts.push(Save(1));
- c.insts.push(Match);
-
- // Try to discover a literal string prefix.
- // This is a bit hacky since we have to skip over the initial
- // 'Save' instruction.
- let mut pre = String::with_capacity(5);
- for inst in c.insts[1..].iter() {
- match *inst {
- OneChar(c, FLAG_EMPTY) => pre.push(c),
- _ => break
- }
- }
-
- let Compiler { insts, names } = c;
- let prog = Program {
- insts: insts,
- prefix: pre,
- };
- (prog, names)
- }
-
- /// Returns the total number of capture groups in the regular expression.
- /// This includes the zeroth capture.
- pub fn num_captures(&self) -> uint {
- let mut n = 0;
- for inst in self.insts.iter() {
- match *inst {
- Save(c) => n = cmp::max(n, c+1),
- _ => {}
- }
- }
- // There's exactly 2 Save slots for every capture.
- n / 2
- }
-}
-
-struct Compiler<'r> {
- insts: Vec<Inst>,
- names: Vec<Option<String>>,
-}
-
-// The compiler implemented here is extremely simple. Most of the complexity
-// in this crate is in the parser or the VM.
-// The only tricky thing here is patching jump/split instructions to point to
-// the right instruction.
-impl<'r> Compiler<'r> {
- fn compile(&mut self, ast: parse::Ast) {
- match ast {
- Nothing => {},
- Literal(c, flags) => self.push(OneChar(c, flags)),
- Dot(nl) => self.push(Any(nl)),
- AstClass(ranges, flags) =>
- self.push(CharClass(ranges, flags)),
- Begin(flags) => self.push(EmptyBegin(flags)),
- End(flags) => self.push(EmptyEnd(flags)),
- WordBoundary(flags) => self.push(EmptyWordBoundary(flags)),
- Capture(cap, name, x) => {
- let len = self.names.len();
- if cap >= len {
- self.names.extend(repeat(None).take(10 + cap - len))
- }
- self.names[cap] = name;
-
- self.push(Save(2 * cap));
- self.compile(*x);
- self.push(Save(2 * cap + 1));
- }
- Cat(xs) => {
- for x in xs.into_iter() {
- self.compile(x)
- }
- }
- Alt(x, y) => {
- let split = self.empty_split(); // push: split 0, 0
- let j1 = self.insts.len();
- self.compile(*x); // push: insts for x
- let jmp = self.empty_jump(); // push: jmp 0
- let j2 = self.insts.len();
- self.compile(*y); // push: insts for y
- let j3 = self.insts.len();
-
- self.set_split(split, j1, j2); // split 0, 0 -> split j1, j2
- self.set_jump(jmp, j3); // jmp 0 -> jmp j3
- }
- Rep(x, ZeroOne, g) => {
- let split = self.empty_split();
- let j1 = self.insts.len();
- self.compile(*x);
- let j2 = self.insts.len();
-
- if g.is_greedy() {
- self.set_split(split, j1, j2);
- } else {
- self.set_split(split, j2, j1);
- }
- }
- Rep(x, ZeroMore, g) => {
- let j1 = self.insts.len();
- let split = self.empty_split();
- let j2 = self.insts.len();
- self.compile(*x);
- let jmp = self.empty_jump();
- let j3 = self.insts.len();
-
- self.set_jump(jmp, j1);
- if g.is_greedy() {
- self.set_split(split, j2, j3);
- } else {
- self.set_split(split, j3, j2);
- }
- }
- Rep(x, OneMore, g) => {
- let j1 = self.insts.len();
- self.compile(*x);
- let split = self.empty_split();
- let j2 = self.insts.len();
-
- if g.is_greedy() {
- self.set_split(split, j1, j2);
- } else {
- self.set_split(split, j2, j1);
- }
- }
- }
- }
-
- /// Appends the given instruction to the program.
- #[inline]
- fn push(&mut self, x: Inst) {
- self.insts.push(x)
- }
-
- /// Appends an *empty* `Split` instruction to the program and returns
- /// the index of that instruction. (The index can then be used to "patch"
- /// the actual locations of the split in later.)
- #[inline]
- fn empty_split(&mut self) -> InstIdx {
- self.insts.push(Split(0, 0));
- self.insts.len() - 1
- }
-
- /// Sets the left and right locations of a `Split` instruction at index
- /// `i` to `pc1` and `pc2`, respectively.
- /// If the instruction at index `i` isn't a `Split` instruction, then
- /// `panic!` is called.
- #[inline]
- fn set_split(&mut self, i: InstIdx, pc1: InstIdx, pc2: InstIdx) {
- let split = &mut self.insts[i];
- match *split {
- Split(_, _) => *split = Split(pc1, pc2),
- _ => panic!("BUG: Invalid split index."),
- }
- }
-
- /// Appends an *empty* `Jump` instruction to the program and returns the
- /// index of that instruction.
- #[inline]
- fn empty_jump(&mut self) -> InstIdx {
- self.insts.push(Jump(0));
- self.insts.len() - 1
- }
-
- /// Sets the location of a `Jump` instruction at index `i` to `pc`.
- /// If the instruction at index `i` isn't a `Jump` instruction, then
- /// `panic!` is called.
- #[inline]
- fn set_jump(&mut self, i: InstIdx, pc: InstIdx) {
- let jmp = &mut self.insts[i];
- match *jmp {
- Jump(_) => *jmp = Jump(pc),
- _ => panic!("BUG: Invalid jump index."),
- }
- }
-}
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
-
-//! Regular expressions implemented in Rust
-//!
-//! For official documentation, see the rust-lang/regex crate
-#![crate_name = "regex"]
-#![crate_type = "rlib"]
-#![crate_type = "dylib"]
-#![unstable = "use the crates.io `regex` library instead"]
-#![staged_api]
-#![doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
- html_favicon_url = "http://www.rust-lang.org/favicon.ico",
- html_root_url = "http://doc.rust-lang.org/nightly/",
- html_playground_url = "http://play.rust-lang.org/")]
-
-#![allow(unknown_features)]
-#![allow(unstable)]
-#![feature(slicing_syntax)]
-#![feature(box_syntax)]
-#![allow(unknown_features)] #![feature(int_uint)]
-#![deny(missing_docs)]
-
-#[cfg(test)]
-extern crate "test" as stdtest;
-#[cfg(test)]
-extern crate rand;
-
-// During tests, this links with the `regex` crate so that the `regex!` macro
-// can be tested.
-#[cfg(test)]
-extern crate regex;
-
-// Unicode tables for character classes are defined in libunicode
-extern crate unicode;
-
-pub use parse::Error;
-pub use re::{Regex, Captures, SubCaptures, SubCapturesPos};
-pub use re::{FindCaptures, FindMatches};
-pub use re::{Replacer, NoExpand, RegexSplits, RegexSplitsN};
-pub use re::{quote, is_match};
-
-mod compile;
-mod parse;
-mod re;
-mod vm;
-
-#[cfg(test)]
-mod test;
-
-/// The `native` module exists to support the `regex!` macro. Do not use.
-#[doc(hidden)]
-pub mod native {
- // Exporting this stuff is bad form, but it's necessary for two reasons.
- // Firstly, the `regex!` syntax extension is in a different crate and
- // requires access to the representation of a regex (particularly the
- // instruction set) in order to compile to native Rust. This could be
- // mitigated if `regex!` was defined in the same crate, but this has
- // undesirable consequences (such as requiring a dependency on
- // `libsyntax`).
- //
- // Secondly, the code generated by `regex!` must *also* be able
- // to access various functions in this crate to reduce code duplication
- // and to provide a value with precisely the same `Regex` type in this
- // crate. This, AFAIK, is impossible to mitigate.
- //
- // On the bright side, `rustdoc` lets us hide this from the public API
- // documentation.
- pub use compile::{
- Program,
- OneChar, CharClass, Any, Save, Jump, Split,
- Match, EmptyBegin, EmptyEnd, EmptyWordBoundary,
- };
- pub use parse::{
- FLAG_EMPTY, FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL,
- FLAG_SWAP_GREED, FLAG_NEGATED,
- };
- pub use re::{Dynamic, ExDynamic, Native, ExNative};
- pub use vm::{
- MatchKind, Exists, Location, Submatches,
- StepState, StepMatchEarlyReturn, StepMatch, StepContinue,
- CharReader, find_prefix,
- };
-}
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-pub use self::Ast::*;
-pub use self::Repeater::*;
-pub use self::Greed::*;
-use self::BuildAst::*;
-
-use std::char;
-use std::cmp;
-use std::fmt;
-use std::iter;
-use std::num;
-
-/// Static data containing Unicode ranges for general categories and scripts.
-use unicode::regex::{UNICODE_CLASSES, PERLD, PERLS, PERLW};
-
-/// The maximum number of repetitions allowed with the `{n,m}` syntax.
-static MAX_REPEAT: uint = 1000;
-
-/// Error corresponds to something that can go wrong while parsing
-/// a regular expression.
-///
-/// (Once an expression is compiled, it is not possible to produce an error
-/// via searching, splitting or replacing.)
-#[derive(Show)]
-pub struct Error {
- /// The *approximate* character index of where the error occurred.
- pub pos: uint,
- /// A message describing the error.
- pub msg: String,
-}
-
-impl fmt::Display for Error {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "Regex syntax error near position {}: {:?}",
- self.pos, self.msg)
- }
-}
-
-/// Represents the abstract syntax of a regular expression.
-/// It is showable so that error messages resulting from a bug can provide
-/// useful information.
-/// It is cloneable so that expressions can be repeated for the counted
-/// repetition feature. (No other copying is done.)
-///
-/// Note that this representation prevents one from reproducing the regex as
-/// it was typed. (But it could be used to reproduce an equivalent regex.)
-#[derive(Show, Clone)]
-pub enum Ast {
- Nothing,
- Literal(char, Flags),
- Dot(Flags),
- AstClass(Vec<(char, char)>, Flags),
- Begin(Flags),
- End(Flags),
- WordBoundary(Flags),
- Capture(uint, Option<String>, Box<Ast>),
- // Represent concatenation as a flat vector to avoid blowing the
- // stack in the compiler.
- Cat(Vec<Ast>),
- Alt(Box<Ast>, Box<Ast>),
- Rep(Box<Ast>, Repeater, Greed),
-}
-
-#[derive(Show, PartialEq, Clone)]
-pub enum Repeater {
- ZeroOne,
- ZeroMore,
- OneMore,
-}
-
-#[derive(Copy, Show, Clone)]
-pub enum Greed {
- Greedy,
- Ungreedy,
-}
-
-impl Greed {
- pub fn is_greedy(&self) -> bool {
- match *self {
- Greedy => true,
- _ => false,
- }
- }
-
- fn swap(self, swapped: bool) -> Greed {
- if !swapped { return self }
- match self {
- Greedy => Ungreedy,
- Ungreedy => Greedy,
- }
- }
-}
-
-/// BuildAst is a regrettable type that represents intermediate state for
-/// constructing an abstract syntax tree. Its central purpose is to facilitate
-/// parsing groups and alternations while also maintaining a stack of flag
-/// state.
-#[derive(Show)]
-enum BuildAst {
- Expr(Ast),
- Paren(Flags, uint, String), // '('
- Bar, // '|'
-}
-
-impl BuildAst {
- fn paren(&self) -> bool {
- match *self {
- Paren(_, _, _) => true,
- _ => false,
- }
- }
-
- fn flags(&self) -> Flags {
- match *self {
- Paren(flags, _, _) => flags,
- _ => panic!("Cannot get flags from {:?}", self),
- }
- }
-
- fn capture(&self) -> Option<uint> {
- match *self {
- Paren(_, 0, _) => None,
- Paren(_, c, _) => Some(c),
- _ => panic!("Cannot get capture group from {:?}", self),
- }
- }
-
- fn capture_name(&self) -> Option<String> {
- match *self {
- Paren(_, 0, _) => None,
- Paren(_, _, ref name) => {
- if name.len() == 0 {
- None
- } else {
- Some(name.clone())
- }
- }
- _ => panic!("Cannot get capture name from {:?}", self),
- }
- }
-
- fn bar(&self) -> bool {
- match *self {
- Bar => true,
- _ => false,
- }
- }
-
- fn unwrap(self) -> Result<Ast, Error> {
- match self {
- Expr(x) => Ok(x),
- _ => panic!("Tried to unwrap non-AST item: {:?}", self),
- }
- }
-}
-
-/// Flags represents all options that can be twiddled by a user in an
-/// expression.
-pub type Flags = u8;
-
-pub const FLAG_EMPTY: u8 = 0;
-pub const FLAG_NOCASE: u8 = 1 << 0; // i
-pub const FLAG_MULTI: u8 = 1 << 1; // m
-pub const FLAG_DOTNL: u8 = 1 << 2; // s
-pub const FLAG_SWAP_GREED: u8 = 1 << 3; // U
-pub const FLAG_NEGATED: u8 = 1 << 4; // char class or not word boundary
-
-struct Parser<'a> {
- // The input, parsed only as a sequence of UTF8 code points.
- chars: Vec<char>,
- // The index of the current character in the input.
- chari: uint,
- // The intermediate state representing the AST.
- stack: Vec<BuildAst>,
- // The current set of flags.
- flags: Flags,
- // The total number of capture groups.
- // Incremented each time an opening left paren is seen (assuming it is
- // opening a capture group).
- caps: uint,
- // A set of all capture group names used only to detect duplicates.
- names: Vec<String>,
-}
-
-pub fn parse(s: &str) -> Result<Ast, Error> {
- Parser {
- chars: s.chars().collect(),
- chari: 0,
- stack: vec!(),
- flags: FLAG_EMPTY,
- caps: 0,
- names: vec!(),
- }.parse()
-}
-
-impl<'a> Parser<'a> {
- fn parse(&mut self) -> Result<Ast, Error> {
- if self.chars.len() == 0 {
- return Ok(Nothing);
- }
- loop {
- let c = self.cur();
- match c {
- '?' | '*' | '+' => try!(self.push_repeater(c)),
- '\\' => {
- let ast = try!(self.parse_escape());
- self.push(ast)
- }
- '{' => try!(self.parse_counted()),
- '[' => match self.try_parse_ascii() {
- None => try!(self.parse_class()),
- Some(class) => self.push(class),
- },
- '(' => {
- if self.peek_is(1, '?') {
- try!(self.expect('?'));
- try!(self.parse_group_opts())
- } else {
- self.caps += 1;
- self.stack.push(Paren(self.flags,
- self.caps,
- "".to_string()))
- }
- }
- ')' => {
- let catfrom = try!(
- self.pos_last(false, |x| x.paren() || x.bar()));
- try!(self.concat(catfrom));
-
- let altfrom = try!(self.pos_last(false, |x| x.paren()));
- // Before we smush the alternates together and pop off the
- // left paren, let's grab the old flags and see if we
- // need a capture.
- let (cap, cap_name, oldflags) = {
- let paren = &self.stack[altfrom-1];
- (paren.capture(), paren.capture_name(), paren.flags())
- };
- try!(self.alternate(altfrom));
- self.flags = oldflags;
-
- // If this was a capture, pop what we just pushed in
- // alternate and make it a capture.
- if cap.is_some() {
- let ast = try!(self.pop_ast());
- self.push(Capture(cap.unwrap(), cap_name, box ast));
- }
- }
- '|' => {
- let catfrom = try!(
- self.pos_last(true, |x| x.paren() || x.bar()));
- try!(self.concat(catfrom));
-
- self.stack.push(Bar);
- }
- _ => try!(self.push_literal(c)),
- }
- if !self.next_char() {
- break
- }
- }
-
- // Try to improve error handling. At this point, there should be
- // no remaining open parens.
- if self.stack.iter().any(|x| x.paren()) {
- return self.err("Unclosed parenthesis.")
- }
- let catfrom = try!(self.pos_last(true, |x| x.bar()));
- try!(self.concat(catfrom));
- try!(self.alternate(0));
-
- assert!(self.stack.len() == 1);
- self.pop_ast()
- }
-
- fn noteof(&mut self, expected: &str) -> Result<(), Error> {
- match self.next_char() {
- true => Ok(()),
- false => {
- self.err(&format!("Expected {:?} but got EOF.",
- expected)[])
- }
- }
- }
-
- fn expect(&mut self, expected: char) -> Result<(), Error> {
- match self.next_char() {
- true if self.cur() == expected => Ok(()),
- true => self.err(&format!("Expected '{:?}' but got '{:?}'.",
- expected, self.cur())[]),
- false => {
- self.err(&format!("Expected '{:?}' but got EOF.",
- expected)[])
- }
- }
- }
-
- fn next_char(&mut self) -> bool {
- self.chari += 1;
- self.chari < self.chars.len()
- }
-
- fn pop_ast(&mut self) -> Result<Ast, Error> {
- match self.stack.pop().unwrap().unwrap() {
- Err(e) => Err(e),
- Ok(ast) => Ok(ast),
- }
- }
-
- fn push(&mut self, ast: Ast) {
- self.stack.push(Expr(ast))
- }
-
- fn push_repeater(&mut self, c: char) -> Result<(), Error> {
- match self.stack.last() {
- Some(&Expr(..)) => (),
- // self.stack is empty, or the top item is not an Expr
- _ => return self.err("A repeat operator must be preceded by a valid expression."),
- }
- let rep: Repeater = match c {
- '?' => ZeroOne, '*' => ZeroMore, '+' => OneMore,
- _ => panic!("Not a valid repeater operator."),
- };
-
- match self.peek(1) {
- Some('*') | Some('+') =>
- return self.err(
- "Double repeat operators are not supported."),
- _ => {},
- }
- let ast = try!(self.pop_ast());
- match ast {
- Begin(_) | End(_) | WordBoundary(_) =>
- return self.err(
- "Repeat arguments cannot be empty width assertions."),
- _ => {}
- }
- let greed = try!(self.get_next_greedy());
- self.push(Rep(box ast, rep, greed));
- Ok(())
- }
-
- fn push_literal(&mut self, c: char) -> Result<(), Error> {
- let flags = self.flags;
- match c {
- '.' => {
- self.push(Dot(flags))
- }
- '^' => {
- self.push(Begin(flags))
- }
- '$' => {
- self.push(End(flags))
- }
- _ => {
- self.push(Literal(c, flags))
- }
- }
- Ok(())
- }
-
- // Parses all forms of character classes.
- // Assumes that '[' is the current character.
- fn parse_class(&mut self) -> Result<(), Error> {
- let negated =
- if self.peek_is(1, '^') {
- try!(self.expect('^'));
- FLAG_NEGATED
- } else {
- FLAG_EMPTY
- };
- let mut ranges: Vec<(char, char)> = vec!();
- let mut alts: Vec<Ast> = vec!();
-
- while self.peek_is(1, '-') {
- try!(self.expect('-'));
- ranges.push(('-', '-'))
- }
- loop {
- try!(self.noteof("a closing ']' or a non-empty character class)"));
- let mut c = self.cur();
- match c {
- '[' =>
- match self.try_parse_ascii() {
- Some(AstClass(asciis, flags)) => {
- alts.push(AstClass(asciis, flags ^ negated));
- continue
- }
- Some(ast) =>
- panic!("Expected Class AST but got '{:?}'", ast),
- // Just drop down and try to add as a regular character.
- None => {},
- },
- '\\' => {
- match try!(self.parse_escape()) {
- AstClass(asciis, flags) => {
- alts.push(AstClass(asciis, flags ^ negated));
- continue
- }
- Literal(c2, _) => c = c2, // process below
- Begin(_) | End(_) | WordBoundary(_) =>
- return self.err(
- "\\A, \\z, \\b and \\B are not valid escape \
- sequences inside a character class."),
- ast => panic!("Unexpected AST item '{:?}'", ast),
- }
- }
- ']' if ranges.len() > 0 || alts.len() > 0 => {
- if ranges.len() > 0 {
- let flags = negated | (self.flags & FLAG_NOCASE);
- let mut ast = AstClass(combine_ranges(ranges), flags);
- for alt in alts.into_iter() {
- ast = Alt(box alt, box ast)
- }
- self.push(ast);
- } else if alts.len() > 0 {
- let mut ast = alts.pop().unwrap();
- for alt in alts.into_iter() {
- ast = Alt(box alt, box ast)
- }
- self.push(ast);
- }
- return Ok(())
- }
- _ => {}
- }
-
- if self.peek_is(1, '-') && !self.peek_is(2, ']') {
- try!(self.expect('-'));
- // The regex can't end here.
- try!(self.noteof("not a ']'"));
- // End the range with a single character or character escape.
- let mut c2 = self.cur();
- if c2 == '\\' {
- match try!(self.parse_escape()) {
- Literal(c3, _) => c2 = c3, // allow literal escapes below
- ast =>
- return self.err(&format!("Expected a literal, but got {:?}.",
- ast)[]),
- }
- }
- if c2 < c {
- return self.err(&format!("Invalid character class \
- range '{}-{}'",
- c,
- c2)[])
- }
- ranges.push((c, self.cur()))
- } else {
- ranges.push((c, c))
- }
- }
- }
-
- // Tries to parse an ASCII character class of the form [:name:].
- // If successful, returns an AST character class corresponding to name
- // and moves the parser to the final ']' character.
- // If unsuccessful, no state is changed and None is returned.
- // Assumes that '[' is the current character.
- fn try_parse_ascii(&mut self) -> Option<Ast> {
- if !self.peek_is(1, ':') {
- return None
- }
- let closer =
- match self.pos(']') {
- Some(i) => i,
- None => return None,
- };
- if self.chars[closer-1] != ':' {
- return None
- }
- if closer - self.chari <= 3 {
- return None
- }
- let mut name_start = self.chari + 2;
- let negated =
- if self.peek_is(2, '^') {
- name_start += 1;
- FLAG_NEGATED
- } else {
- FLAG_EMPTY
- };
- let name = self.slice(name_start, closer - 1);
- match find_class(ASCII_CLASSES, &name[]) {
- None => None,
- Some(ranges) => {
- self.chari = closer;
- let flags = negated | (self.flags & FLAG_NOCASE);
- Some(AstClass(combine_ranges(ranges), flags))
- }
- }
- }
-
- // Parses counted repetition. Supports:
- // {n}, {n,}, {n,m}, {n}?, {n,}? and {n,m}?
- // Assumes that '{' is the current character.
- // Returns either an error or moves the parser to the final '}' character.
- // (Or the '?' character if not greedy.)
- fn parse_counted(&mut self) -> Result<(), Error> {
- // Scan until the closing '}' and grab the stuff in {}.
- let start = self.chari;
- let closer =
- match self.pos('}') {
- Some(i) => i,
- None => {
- return self.err(&format!("No closing brace for counted \
- repetition starting at position \
- {:?}.",
- start)[])
- }
- };
- self.chari = closer;
- let greed = try!(self.get_next_greedy());
- let inner = self.chars[start+1..closer].iter().cloned()
- .collect::<String>();
-
- // Parse the min and max values from the regex.
- let (mut min, mut max): (uint, Option<uint>);
- if !inner.contains(",") {
- min = try!(self.parse_uint(&inner[]));
- max = Some(min);
- } else {
- let pieces: Vec<&str> = inner.splitn(1, ',').collect();
- let (smin, smax) = (pieces[0], pieces[1]);
- if smin.len() == 0 {
- return self.err("Max repetitions cannot be specified \
- without min repetitions.")
- }
- min = try!(self.parse_uint(smin));
- max =
- if smax.len() == 0 {
- None
- } else {
- Some(try!(self.parse_uint(smax)))
- };
- }
-
- // Do some bounds checking and make sure max >= min.
- if min > MAX_REPEAT {
- return self.err(&format!(
- "{} exceeds maximum allowed repetitions ({})",
- min, MAX_REPEAT)[]);
- }
- if max.is_some() {
- let m = max.unwrap();
- if m > MAX_REPEAT {
- return self.err(&format!(
- "{} exceeds maximum allowed repetitions ({})",
- m, MAX_REPEAT)[]);
- }
- if m < min {
- return self.err(&format!(
- "Max repetitions ({}) cannot be smaller than min \
- repetitions ({}).", m, min)[]);
- }
- }
-
- // Now manipulate the AST be repeating elements.
- if max.is_none() {
- // Require N copies of what's on the stack and then repeat it.
- let ast = try!(self.pop_ast());
- for _ in iter::range(0, min) {
- self.push(ast.clone())
- }
- self.push(Rep(box ast, ZeroMore, greed));
- } else {
- // Require N copies of what's on the stack and then repeat it
- // up to M times optionally.
- let ast = try!(self.pop_ast());
- for _ in iter::range(0, min) {
- self.push(ast.clone())
- }
- if max.is_some() {
- for _ in iter::range(min, max.unwrap()) {
- self.push(Rep(box ast.clone(), ZeroOne, greed))
- }
- }
- // It's possible that we popped something off the stack but
- // never put anything back on it. To keep things simple, add
- // a no-op expression.
- if min == 0 && (max.is_none() || max == Some(0)) {
- self.push(Nothing)
- }
- }
- Ok(())
- }
-
- // Parses all escape sequences.
- // Assumes that '\' is the current character.
- fn parse_escape(&mut self) -> Result<Ast, Error> {
- try!(self.noteof("an escape sequence following a '\\'"));
-
- let c = self.cur();
- if is_punct(c) {
- return Ok(Literal(c, FLAG_EMPTY))
- }
- match c {
- 'a' => Ok(Literal('\x07', FLAG_EMPTY)),
- 'f' => Ok(Literal('\x0C', FLAG_EMPTY)),
- 't' => Ok(Literal('\t', FLAG_EMPTY)),
- 'n' => Ok(Literal('\n', FLAG_EMPTY)),
- 'r' => Ok(Literal('\r', FLAG_EMPTY)),
- 'v' => Ok(Literal('\x0B', FLAG_EMPTY)),
- 'A' => Ok(Begin(FLAG_EMPTY)),
- 'z' => Ok(End(FLAG_EMPTY)),
- 'b' => Ok(WordBoundary(FLAG_EMPTY)),
- 'B' => Ok(WordBoundary(FLAG_NEGATED)),
- '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7' => Ok(try!(self.parse_octal())),
- 'x' => Ok(try!(self.parse_hex())),
- 'p' | 'P' => Ok(try!(self.parse_unicode_name())),
- 'd' | 'D' | 's' | 'S' | 'w' | 'W' => {
- let ranges = perl_unicode_class(c);
- let mut flags = self.flags & FLAG_NOCASE;
- if c.is_uppercase() { flags |= FLAG_NEGATED }
- Ok(AstClass(ranges, flags))
- }
- _ => {
- self.err(&format!("Invalid escape sequence '\\\\{}'", c)[])
- }
- }
- }
-
- // Parses a Unicode character class name, either of the form \pF where
- // F is a one letter Unicode class name or of the form \p{name} where
- // name is the Unicode class name.
- // Assumes that \p or \P has been read (and 'p' or 'P' is the current
- // character).
- fn parse_unicode_name(&mut self) -> Result<Ast, Error> {
- let negated = if self.cur() == 'P' { FLAG_NEGATED } else { FLAG_EMPTY };
- let mut name: String;
- if self.peek_is(1, '{') {
- try!(self.expect('{'));
- let closer =
- match self.pos('}') {
- Some(i) => i,
- None => return self.err(&format!(
- "Missing '}}' for unclosed '{{' at position {}",
- self.chari)[]),
- };
- if closer - self.chari + 1 == 0 {
- return self.err("No Unicode class name found.")
- }
- name = self.slice(self.chari + 1, closer);
- self.chari = closer;
- } else {
- if self.chari + 1 >= self.chars.len() {
- return self.err("No single letter Unicode class name found.")
- }
- name = self.slice(self.chari + 1, self.chari + 2);
- self.chari += 1;
- }
- match find_class(UNICODE_CLASSES, &name[]) {
- None => {
- return self.err(&format!("Could not find Unicode class '{}'",
- name)[])
- }
- Some(ranges) => {
- Ok(AstClass(ranges, negated | (self.flags & FLAG_NOCASE)))
- }
- }
- }
-
- // Parses an octal number, up to 3 digits.
- // Assumes that \n has been read, where n is the first digit.
- fn parse_octal(&mut self) -> Result<Ast, Error> {
- let start = self.chari;
- let mut end = start + 1;
- let (d2, d3) = (self.peek(1), self.peek(2));
- if d2 >= Some('0') && d2 <= Some('7') {
- try!(self.noteof("expected octal character in [0-7]"));
- end += 1;
- if d3 >= Some('0') && d3 <= Some('7') {
- try!(self.noteof("expected octal character in [0-7]"));
- end += 1;
- }
- }
- let s = self.slice(start, end);
- match num::from_str_radix::<u32>(&s[], 8) {
- Some(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)),
- None => {
- self.err(&format!("Could not parse '{:?}' as octal number.",
- s)[])
- }
- }
- }
-
- // Parse a hex number. Either exactly two digits or anything in {}.
- // Assumes that \x has been read.
- fn parse_hex(&mut self) -> Result<Ast, Error> {
- if !self.peek_is(1, '{') {
- try!(self.expect('{'));
- return self.parse_hex_two()
- }
- let start = self.chari + 2;
- let closer =
- match self.pos('}') {
- None => {
- return self.err(&format!("Missing '}}' for unclosed \
- '{{' at position {}",
- start)[])
- }
- Some(i) => i,
- };
- self.chari = closer;
- self.parse_hex_digits(&self.slice(start, closer)[])
- }
-
- // Parses a two-digit hex number.
- // Assumes that \xn has been read, where n is the first digit and is the
- // current character.
- // After return, parser will point at the second digit.
- fn parse_hex_two(&mut self) -> Result<Ast, Error> {
- let (start, end) = (self.chari, self.chari + 2);
- let bad = self.slice(start - 2, self.chars.len());
- try!(self.noteof(format!("Invalid hex escape sequence '{}'",
- bad).as_slice()));
- self.parse_hex_digits(self.slice(start, end).as_slice())
- }
-
- // Parses `s` as a hexadecimal number.
- fn parse_hex_digits(&self, s: &str) -> Result<Ast, Error> {
- match num::from_str_radix::<u32>(s, 16) {
- Some(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)),
- None => {
- self.err(&format!("Could not parse '{}' as hex number.", s)[])
- }
- }
- }
-
- // Parses a named capture.
- // Assumes that '(?P<' has been consumed and that the current character
- // is '<'.
- // When done, parser will be at the closing '>' character.
- fn parse_named_capture(&mut self) -> Result<(), Error> {
- try!(self.noteof("a capture name"));
- let closer =
- match self.pos('>') {
- Some(i) => i,
- None => return self.err("Capture name must end with '>'."),
- };
- if closer - self.chari == 0 {
- return self.err("Capture names must have at least 1 character.")
- }
- let name = self.slice(self.chari, closer);
- if !name.chars().all(is_valid_cap) {
- return self.err(
- "Capture names can only have underscores, letters and digits.")
- }
- if self.names.contains(&name) {
- return self.err(&format!("Duplicate capture group name '{}'.",
- name)[])
- }
- self.names.push(name.clone());
- self.chari = closer;
- self.caps += 1;
- self.stack.push(Paren(self.flags, self.caps, name));
- Ok(())
- }
-
- // Parses non-capture groups and options.
- // Assumes that '(?' has already been consumed and '?' is the current
- // character.
- fn parse_group_opts(&mut self) -> Result<(), Error> {
- if self.peek_is(1, 'P') && self.peek_is(2, '<') {
- try!(self.expect('P'));
- try!(self.expect('<'));
- return self.parse_named_capture()
- }
- let start = self.chari;
- let mut flags = self.flags;
- let mut sign = 1i;
- let mut saw_flag = false;
- loop {
- try!(self.noteof(
- "expected non-empty set of flags or closing ')'"));
- match self.cur() {
- 'i' => { flags = flags | FLAG_NOCASE; saw_flag = true},
- 'm' => { flags = flags | FLAG_MULTI; saw_flag = true},
- 's' => { flags = flags | FLAG_DOTNL; saw_flag = true},
- 'U' => { flags = flags | FLAG_SWAP_GREED; saw_flag = true},
- '-' => {
- if sign < 0 {
- return self.err(&format!(
- "Cannot negate flags twice in '{}'.",
- self.slice(start, self.chari + 1))[])
- }
- sign = -1;
- saw_flag = false;
- flags = flags ^ flags;
- }
- ':' | ')' => {
- if sign < 0 {
- if !saw_flag {
- return self.err(&format!(
- "A valid flag does not follow negation in '{}'",
- self.slice(start, self.chari + 1))[])
- }
- flags = flags ^ flags;
- }
- if self.cur() == ':' {
- // Save the old flags with the opening paren.
- self.stack.push(Paren(self.flags, 0, "".to_string()));
- }
- self.flags = flags;
- return Ok(())
- }
- _ => return self.err(&format!(
- "Unrecognized flag '{}'.", self.cur())[]),
- }
- }
- }
-
- // Peeks at the next character and returns whether it's ungreedy or not.
- // If it is, then the next character is consumed.
- fn get_next_greedy(&mut self) -> Result<Greed, Error> {
- Ok(if self.peek_is(1, '?') {
- try!(self.expect('?'));
- Ungreedy
- } else {
- Greedy
- }.swap(self.flags & FLAG_SWAP_GREED > 0))
- }
-
- // Searches the stack (starting at the top) until it finds an expression
- // for which `pred` returns true. The index of that expression in the
- // stack is returned.
- // If there's no match, then one of two things happens depending on the
- // values of `allow_start`. When it's true, then `0` will be returned.
- // Otherwise, an error will be returned.
- // Generally, `allow_start` is only true when you're *not* expecting an
- // opening parenthesis.
- fn pos_last<P>(&self, allow_start: bool, pred: P) -> Result<uint, Error> where
- P: FnMut(&BuildAst) -> bool,
- {
- let from = match self.stack.iter().rev().position(pred) {
- Some(i) => i,
- None => {
- if allow_start {
- self.stack.len()
- } else {
- return self.err("No matching opening parenthesis.")
- }
- }
- };
- // Adjust index since 'from' is for the reversed stack.
- // Also, don't include the '(' or '|'.
- Ok(self.stack.len() - from)
- }
-
- // concat starts at `from` in the parser's stack and concatenates all
- // expressions up to the top of the stack. The resulting concatenation is
- // then pushed on to the stack.
- // Usually `from` corresponds to the position of an opening parenthesis,
- // a '|' (alternation) or the start of the entire expression.
- fn concat(&mut self, from: uint) -> Result<(), Error> {
- let ast = try!(self.build_from(from, concat_flatten));
- self.push(ast);
- Ok(())
- }
-
- // concat starts at `from` in the parser's stack and alternates all
- // expressions up to the top of the stack. The resulting alternation is
- // then pushed on to the stack.
- // Usually `from` corresponds to the position of an opening parenthesis
- // or the start of the entire expression.
- // This will also drop any opening parens or alternation bars found in
- // the intermediate AST.
- fn alternate(&mut self, mut from: uint) -> Result<(), Error> {
- // Unlike in the concatenation case, we want 'build_from' to continue
- // all the way to the opening left paren (so it will be popped off and
- // thrown away). But be careful with overflow---we can't count on the
- // open paren to be there.
- if from > 0 { from = from - 1}
- let ast = try!(self.build_from(from, |l,r| Alt(box l, box r)));
- self.push(ast);
- Ok(())
- }
-
- // build_from combines all AST elements starting at 'from' in the
- // parser's stack using 'mk' to combine them. If any such element is not an
- // AST then it is popped off the stack and ignored.
- fn build_from<F>(&mut self, from: uint, mut mk: F) -> Result<Ast, Error> where
- F: FnMut(Ast, Ast) -> Ast,
- {
- if from >= self.stack.len() {
- return self.err("Empty group or alternate not allowed.")
- }
-
- let mut combined = try!(self.pop_ast());
- let mut i = self.stack.len();
- while i > from {
- i = i - 1;
- match self.stack.pop().unwrap() {
- Expr(x) => combined = mk(x, combined),
- _ => {},
- }
- }
- Ok(combined)
- }
-
- fn parse_uint(&self, s: &str) -> Result<uint, Error> {
- match s.parse::<uint>() {
- Some(i) => Ok(i),
- None => {
- self.err(&format!("Expected an unsigned integer but got '{}'.",
- s)[])
- }
- }
- }
-
- fn char_from_u32(&self, n: u32) -> Result<char, Error> {
- match char::from_u32(n) {
- Some(c) => Ok(c),
- None => {
- self.err(&format!("Could not decode '{}' to unicode \
- character.", n)[])
- }
- }
- }
-
- fn pos(&self, c: char) -> Option<uint> {
- self.chars.iter()
- .skip(self.chari).position(|&c2| c2 == c).map(|i| self.chari + i)
- }
-
- fn err<T>(&self, msg: &str) -> Result<T, Error> {
- Err(Error {
- pos: self.chari,
- msg: msg.to_string(),
- })
- }
-
- fn peek(&self, offset: uint) -> Option<char> {
- if self.chari + offset >= self.chars.len() {
- return None
- }
- Some(self.chars[self.chari + offset])
- }
-
- fn peek_is(&self, offset: uint, is: char) -> bool {
- self.peek(offset) == Some(is)
- }
-
- fn cur(&self) -> char {
- self.chars[self.chari]
- }
-
- fn slice(&self, start: uint, end: uint) -> String {
- self.chars[start..end].iter().cloned().collect()
- }
-}
-
-// Given an unordered collection of character ranges, combine_ranges returns
-// an ordered sequence of character ranges where no two ranges overlap. They
-// are ordered from least to greatest (using start position).
-fn combine_ranges(unordered: Vec<(char, char)>) -> Vec<(char, char)> {
- // Returns true iff the two character classes overlap or share a boundary.
- // e.g., ('a', 'g') and ('h', 'm') would return true.
- fn should_merge((a, b): (char, char), (x, y): (char, char)) -> bool {
- cmp::max(a, x) as u32 <= cmp::min(b, y) as u32 + 1
- }
-
- // This is currently O(n^2), but I think with sufficient cleverness,
- // it can be reduced to O(n) **if necessary**.
- let mut ordered: Vec<(char, char)> = Vec::with_capacity(unordered.len());
- for (us, ue) in unordered.into_iter() {
- let (mut us, mut ue) = (us, ue);
- assert!(us <= ue);
- let mut which: Option<uint> = None;
- for (i, &(os, oe)) in ordered.iter().enumerate() {
- if should_merge((us, ue), (os, oe)) {
- us = cmp::min(us, os);
- ue = cmp::max(ue, oe);
- which = Some(i);
- break
- }
- }
- match which {
- None => ordered.push((us, ue)),
- Some(i) => ordered[i] = (us, ue),
- }
- }
- ordered.sort();
- ordered
-}
-
-// Constructs a Unicode friendly Perl character class from \d, \s or \w
-// (or any of their negated forms). Note that this does not handle negation.
-fn perl_unicode_class(which: char) -> Vec<(char, char)> {
- match which.to_lowercase() {
- 'd' => PERLD.to_vec(),
- 's' => PERLS.to_vec(),
- 'w' => PERLW.to_vec(),
- _ => unreachable!(),
- }
-}
-
-// Returns a concatenation of two expressions. This also guarantees that a
-// `Cat` expression will never be a direct child of another `Cat` expression.
-fn concat_flatten(x: Ast, y: Ast) -> Ast {
- match (x, y) {
- (Cat(mut xs), Cat(ys)) => { xs.extend(ys.into_iter()); Cat(xs) }
- (Cat(mut xs), ast) => { xs.push(ast); Cat(xs) }
- (ast, Cat(mut xs)) => { xs.insert(0, ast); Cat(xs) }
- (ast1, ast2) => Cat(vec!(ast1, ast2)),
- }
-}
-
-pub fn is_punct(c: char) -> bool {
- match c {
- '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
- '[' | ']' | '{' | '}' | '^' | '$' => true,
- _ => false,
- }
-}
-
-fn is_valid_cap(c: char) -> bool {
- c == '_' || (c >= '0' && c <= '9')
- || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-}
-
-fn find_class(classes: NamedClasses, name: &str) -> Option<Vec<(char, char)>> {
- match classes.binary_search_by(|&(s, _)| s.cmp(name)) {
- Ok(i) => Some(classes[i].1.to_vec()),
- Err(_) => None,
- }
-}
-
-type Class = &'static [(char, char)];
-type NamedClasses = &'static [(&'static str, &'static Class)];
-
-static ASCII_CLASSES: NamedClasses = &[
- // Classes must be in alphabetical order so that bsearch works.
- // [:alnum:] alphanumeric (== [0-9A-Za-z])
- // [:alpha:] alphabetic (== [A-Za-z])
- // [:ascii:] ASCII (== [\x00-\x7F])
- // [:blank:] blank (== [\t ])
- // [:cntrl:] control (== [\x00-\x1F\x7F])
- // [:digit:] digits (== [0-9])
- // [:graph:] graphical (== [!-~])
- // [:lower:] lower case (== [a-z])
- // [:print:] printable (== [ -~] == [ [:graph:]])
- // [:punct:] punctuation (== [!-/:-@[-`{-~])
- // [:space:] whitespace (== [\t\n\v\f\r ])
- // [:upper:] upper case (== [A-Z])
- // [:word:] word characters (== [0-9A-Za-z_])
- // [:xdigit:] hex digit (== [0-9A-Fa-f])
- // Taken from: http://golang.org/pkg/regex/syntax/
- ("alnum", &ALNUM),
- ("alpha", &ALPHA),
- ("ascii", &ASCII),
- ("blank", &BLANK),
- ("cntrl", &CNTRL),
- ("digit", &DIGIT),
- ("graph", &GRAPH),
- ("lower", &LOWER),
- ("print", &PRINT),
- ("punct", &PUNCT),
- ("space", &SPACE),
- ("upper", &UPPER),
- ("word", &WORD),
- ("xdigit", &XDIGIT),
-];
-
-static ALNUM: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z')];
-static ALPHA: Class = &[('A', 'Z'), ('a', 'z')];
-static ASCII: Class = &[('\x00', '\x7F')];
-static BLANK: Class = &[(' ', ' '), ('\t', '\t')];
-static CNTRL: Class = &[('\x00', '\x1F'), ('\x7F', '\x7F')];
-static DIGIT: Class = &[('0', '9')];
-static GRAPH: Class = &[('!', '~')];
-static LOWER: Class = &[('a', 'z')];
-static PRINT: Class = &[(' ', '~')];
-static PUNCT: Class = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')];
-static SPACE: Class = &[('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'),
- ('\x0C', '\x0C'), ('\r', '\r'), (' ', ' ')];
-static UPPER: Class = &[('A', 'Z')];
-static WORD: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z'), ('_', '_')];
-static XDIGIT: Class = &[('0', '9'), ('A', 'F'), ('a', 'f')];
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-pub use self::NamesIter::*;
-pub use self::Regex::*;
-
-use std::borrow::IntoCow;
-use std::collections::HashMap;
-use std::fmt;
-use std::string::CowString;
-
-use compile::Program;
-use parse;
-use vm;
-use vm::{CaptureLocs, MatchKind, Exists, Location, Submatches};
-
-/// Escapes all regular expression meta characters in `text`.
-///
-/// The string returned may be safely used as a literal in a regular
-/// expression.
-pub fn quote(text: &str) -> String {
- let mut quoted = String::with_capacity(text.len());
- for c in text.chars() {
- if parse::is_punct(c) {
- quoted.push('\\')
- }
- quoted.push(c);
- }
- quoted
-}
-
-/// Tests if the given regular expression matches somewhere in the text given.
-///
-/// If there was a problem compiling the regular expression, an error is
-/// returned.
-///
-/// To find submatches, split or replace text, you'll need to compile an
-/// expression first.
-///
-/// Note that you should prefer the `regex!` macro when possible. For example,
-/// `regex!("...").is_match("...")`.
-pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
- Regex::new(regex).map(|r| r.is_match(text))
-}
-
-/// A compiled regular expression
-#[derive(Clone)]
-pub enum Regex {
- // The representation of `Regex` is exported to support the `regex!`
- // syntax extension. Do not rely on it.
- //
- // See the comments for the `program` module in `lib.rs` for a more
- // detailed explanation for what `regex!` requires.
- #[doc(hidden)]
- Dynamic(ExDynamic),
- #[doc(hidden)]
- Native(ExNative),
-}
-
-#[derive(Clone)]
-#[doc(hidden)]
-pub struct ExDynamic {
- original: String,
- names: Vec<Option<String>>,
- #[doc(hidden)]
- pub prog: Program
-}
-
-#[doc(hidden)]
-#[derive(Copy)]
-pub struct ExNative {
- #[doc(hidden)]
- pub original: &'static str,
- #[doc(hidden)]
- pub names: &'static &'static [Option<&'static str>],
- #[doc(hidden)]
- pub prog: fn(MatchKind, &str, uint, uint) -> Vec<Option<uint>>
-}
-
-impl Clone for ExNative {
- fn clone(&self) -> ExNative {
- *self
- }
-}
-
-impl fmt::Display for Regex {
- /// Shows the original regular expression.
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- fmt::Display::fmt(self.as_str(), f)
- }
-}
-
-impl Regex {
- /// Compiles a dynamic regular expression. Once compiled, it can be
- /// used repeatedly to search, split or replace text in a string.
- ///
- /// When possible, you should prefer the `regex!` macro since it is
- /// safer and always faster.
- ///
- /// If an invalid expression is given, then an error is returned.
- pub fn new(re: &str) -> Result<Regex, parse::Error> {
- let ast = try!(parse::parse(re));
- let (prog, names) = Program::new(ast);
- Ok(Dynamic(ExDynamic {
- original: re.to_string(),
- names: names,
- prog: prog,
- }))
- }
-
- /// Returns true if and only if the regex matches the string given.
- pub fn is_match(&self, text: &str) -> bool {
- has_match(&exec(self, Exists, text))
- }
-
- /// Returns the start and end byte range of the leftmost-first match in
- /// `text`. If no match exists, then `None` is returned.
- pub fn find(&self, text: &str) -> Option<(uint, uint)> {
- let caps = exec(self, Location, text);
- if has_match(&caps) {
- Some((caps[0].unwrap(), caps[1].unwrap()))
- } else {
- None
- }
- }
-
- /// Returns an iterator for each successive non-overlapping match in
- /// `text`, returning the start and end byte indices with respect to
- /// `text`.
- pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> {
- FindMatches {
- re: self,
- search: text,
- last_end: 0,
- last_match: None,
- }
- }
-
- /// Returns the capture groups corresponding to the leftmost-first
- /// match in `text`. Capture group `0` always corresponds to the entire
- /// match. If no match is found, then `None` is returned.
- ///
- /// You should only use `captures` if you need access to submatches.
- /// Otherwise, `find` is faster for discovering the location of the overall
- /// match.
- pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
- let caps = exec(self, Submatches, text);
- Captures::new(self, text, caps)
- }
-
- /// Returns an iterator over all the non-overlapping capture groups matched
- /// in `text`. This is operationally the same as `find_iter` (except it
- /// yields information about submatches).
- pub fn captures_iter<'r, 't>(&'r self, text: &'t str)
- -> FindCaptures<'r, 't> {
- FindCaptures {
- re: self,
- search: text,
- last_match: None,
- last_end: 0,
- }
- }
-
- /// Returns an iterator of substrings of `text` delimited by a match
- /// of the regular expression.
- /// Namely, each element of the iterator corresponds to text that *isn't*
- /// matched by the regular expression.
- ///
- /// This method will *not* copy the text given.
- pub fn split<'r, 't>(&'r self, text: &'t str) -> RegexSplits<'r, 't> {
- RegexSplits {
- finder: self.find_iter(text),
- last: 0,
- }
- }
-
- /// Returns an iterator of at most `limit` substrings of `text` delimited
- /// by a match of the regular expression. (A `limit` of `0` will return no
- /// substrings.)
- /// Namely, each element of the iterator corresponds to text that *isn't*
- /// matched by the regular expression.
- /// The remainder of the string that is not split will be the last element
- /// in the iterator.
- ///
- /// This method will *not* copy the text given.
- pub fn splitn<'r, 't>(&'r self, text: &'t str, limit: uint)
- -> RegexSplitsN<'r, 't> {
- RegexSplitsN {
- splits: self.split(text),
- cur: 0,
- limit: limit,
- }
- }
-
- /// Replaces the leftmost-first match with the replacement provided.
- /// The replacement can be a regular string (where `$N` and `$name` are
- /// expanded to match capture groups) or a function that takes the matches'
- /// `Captures` and returns the replaced string.
- ///
- /// If no match is found, then a copy of the string is returned unchanged.
- pub fn replace<R: Replacer>(&self, text: &str, rep: R) -> String {
- self.replacen(text, 1, rep)
- }
-
- /// Replaces all non-overlapping matches in `text` with the
- /// replacement provided. This is the same as calling `replacen` with
- /// `limit` set to `0`.
- ///
- /// See the documentation for `replace` for details on how to access
- /// submatches in the replacement string.
- pub fn replace_all<R: Replacer>(&self, text: &str, rep: R) -> String {
- self.replacen(text, 0, rep)
- }
-
- /// Replaces at most `limit` non-overlapping matches in `text` with the
- /// replacement provided. If `limit` is 0, then all non-overlapping matches
- /// are replaced.
- ///
- /// See the documentation for `replace` for details on how to access
- /// submatches in the replacement string.
- pub fn replacen<R: Replacer>
- (&self, text: &str, limit: uint, mut rep: R) -> String {
- let mut new = String::with_capacity(text.len());
- let mut last_match = 0u;
-
- for (i, cap) in self.captures_iter(text).enumerate() {
- // It'd be nicer to use the 'take' iterator instead, but it seemed
- // awkward given that '0' => no limit.
- if limit > 0 && i >= limit {
- break
- }
-
- let (s, e) = cap.pos(0).unwrap(); // captures only reports matches
- new.push_str(&text[last_match..s]);
- new.push_str(&rep.reg_replace(&cap)[]);
- last_match = e;
- }
- new.push_str(&text[last_match..text.len()]);
- return new;
- }
-
- /// Returns the original string of this regex.
- pub fn as_str<'a>(&'a self) -> &'a str {
- match *self {
- Dynamic(ExDynamic { ref original, .. }) => &original[],
- Native(ExNative { ref original, .. }) => &original[],
- }
- }
-
- #[doc(hidden)]
- #[unstable]
- pub fn names_iter<'a>(&'a self) -> NamesIter<'a> {
- match *self {
- Native(ref n) => NamesIterNative(n.names.iter()),
- Dynamic(ref d) => NamesIterDynamic(d.names.iter())
- }
- }
-
- fn names_len(&self) -> uint {
- match *self {
- Native(ref n) => n.names.len(),
- Dynamic(ref d) => d.names.len()
- }
- }
-
-}
-
-#[derive(Clone)]
-pub enum NamesIter<'a> {
- NamesIterNative(::std::slice::Iter<'a, Option<&'static str>>),
- NamesIterDynamic(::std::slice::Iter<'a, Option<String>>)
-}
-
-impl<'a> Iterator for NamesIter<'a> {
- type Item = Option<String>;
-
- fn next(&mut self) -> Option<Option<String>> {
- match *self {
- NamesIterNative(ref mut i) => i.next().map(|x| x.map(|s| s.to_string())),
- NamesIterDynamic(ref mut i) => i.next().map(|x| x.as_ref().map(|s| s.to_string())),
- }
- }
-}
-
-/// NoExpand indicates literal string replacement.
-///
-/// It can be used with `replace` and `replace_all` to do a literal
-/// string replacement without expanding `$name` to their corresponding
-/// capture groups.
-///
-/// `'r` is the lifetime of the literal text.
-pub struct NoExpand<'t>(pub &'t str);
-
-/// Replacer describes types that can be used to replace matches in a string.
-pub trait Replacer {
- /// Returns a possibly owned string that is used to replace the match
- /// corresponding to the `caps` capture group.
- ///
- /// The `'a` lifetime refers to the lifetime of a borrowed string when
- /// a new owned string isn't needed (e.g., for `NoExpand`).
- fn reg_replace<'a>(&'a mut self, caps: &Captures) -> CowString<'a>;
-}
-
-impl<'t> Replacer for NoExpand<'t> {
- fn reg_replace<'a>(&'a mut self, _: &Captures) -> CowString<'a> {
- let NoExpand(s) = *self;
- s.into_cow()
- }
-}
-
-impl<'t> Replacer for &'t str {
- fn reg_replace<'a>(&'a mut self, caps: &Captures) -> CowString<'a> {
- caps.expand(*self).into_cow()
- }
-}
-
-impl<F> Replacer for F where F: FnMut(&Captures) -> String {
- fn reg_replace<'a>(&'a mut self, caps: &Captures) -> CowString<'a> {
- (*self)(caps).into_cow()
- }
-}
-
-/// Yields all substrings delimited by a regular expression match.
-///
-/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
-/// of the string being split.
-#[derive(Clone)]
-pub struct RegexSplits<'r, 't> {
- finder: FindMatches<'r, 't>,
- last: uint,
-}
-
-impl<'r, 't> Iterator for RegexSplits<'r, 't> {
- type Item = &'t str;
-
- fn next(&mut self) -> Option<&'t str> {
- let text = self.finder.search;
- match self.finder.next() {
- None => {
- if self.last >= text.len() {
- None
- } else {
- let s = &text[self.last..text.len()];
- self.last = text.len();
- Some(s)
- }
- }
- Some((s, e)) => {
- let matched = &text[self.last..s];
- self.last = e;
- Some(matched)
- }
- }
- }
-}
-
-/// Yields at most `N` substrings delimited by a regular expression match.
-///
-/// The last substring will be whatever remains after splitting.
-///
-/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
-/// of the string being split.
-#[derive(Clone)]
-pub struct RegexSplitsN<'r, 't> {
- splits: RegexSplits<'r, 't>,
- cur: uint,
- limit: uint,
-}
-
-impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
- type Item = &'t str;
-
- fn next(&mut self) -> Option<&'t str> {
- let text = self.splits.finder.search;
- if self.cur >= self.limit {
- None
- } else {
- self.cur += 1;
- if self.cur >= self.limit {
- Some(&text[self.splits.last..text.len()])
- } else {
- self.splits.next()
- }
- }
- }
-}
-
-/// Captures represents a group of captured strings for a single match.
-///
-/// The 0th capture always corresponds to the entire match. Each subsequent
-/// index corresponds to the next capture group in the regex.
-/// If a capture group is named, then the matched string is *also* available
-/// via the `name` method. (Note that the 0th capture is always unnamed and so
-/// must be accessed with the `at` method.)
-///
-/// Positions returned from a capture group are always byte indices.
-///
-/// `'t` is the lifetime of the matched text.
-pub struct Captures<'t> {
- text: &'t str,
- locs: CaptureLocs,
- named: Option<HashMap<String, uint>>,
-}
-
-impl<'t> Captures<'t> {
- #[allow(unstable)]
- fn new(re: &Regex, search: &'t str, locs: CaptureLocs)
- -> Option<Captures<'t>> {
- if !has_match(&locs) {
- return None
- }
-
- let named =
- if re.names_len() == 0 {
- None
- } else {
- let mut named = HashMap::new();
- for (i, name) in re.names_iter().enumerate() {
- match name {
- None => {},
- Some(name) => {
- named.insert(name, i);
- }
- }
- }
- Some(named)
- };
- Some(Captures {
- text: search,
- locs: locs,
- named: named,
- })
- }
-
- /// Returns the start and end positions of the Nth capture group.
- /// Returns `None` if `i` is not a valid capture group or if the capture
- /// group did not match anything.
- /// The positions returned are *always* byte indices with respect to the
- /// original string matched.
- pub fn pos(&self, i: uint) -> Option<(uint, uint)> {
- let (s, e) = (i * 2, i * 2 + 1);
- if e >= self.locs.len() || self.locs[s].is_none() {
- // VM guarantees that each pair of locations are both Some or None.
- return None
- }
- Some((self.locs[s].unwrap(), self.locs[e].unwrap()))
- }
-
- /// Returns the matched string for the capture group `i`. If `i` isn't
- /// a valid capture group or didn't match anything, then `None` is
- /// returned.
- pub fn at(&self, i: uint) -> Option<&'t str> {
- match self.pos(i) {
- None => None,
- Some((s, e)) => Some(&self.text[s.. e])
- }
- }
-
- /// Returns the matched string for the capture group named `name`. If
- /// `name` isn't a valid capture group or didn't match anything, then
- /// `None` is returned.
- pub fn name(&self, name: &str) -> Option<&'t str> {
- match self.named {
- None => None,
- Some(ref h) => {
- match h.get(name) {
- None => None,
- Some(i) => self.at(*i),
- }
- }
- }
- }
-
- /// Creates an iterator of all the capture groups in order of appearance
- /// in the regular expression.
- pub fn iter(&'t self) -> SubCaptures<'t> {
- SubCaptures { idx: 0, caps: self, }
- }
-
- /// Creates an iterator of all the capture group positions in order of
- /// appearance in the regular expression. Positions are byte indices
- /// in terms of the original string matched.
- pub fn iter_pos(&'t self) -> SubCapturesPos<'t> {
- SubCapturesPos { idx: 0, caps: self, }
- }
-
- /// Expands all instances of `$name` in `text` to the corresponding capture
- /// group `name`.
- ///
- /// `name` may be an integer corresponding to the index of the
- /// capture group (counted by order of opening parenthesis where `0` is the
- /// entire match) or it can be a name (consisting of letters, digits or
- /// underscores) corresponding to a named capture group.
- ///
- /// If `name` isn't a valid capture group (whether the name doesn't exist or
- /// isn't a valid index), then it is replaced with the empty string.
- ///
- /// To write a literal `$` use `$$`.
- pub fn expand(&self, text: &str) -> String {
- // How evil can you get?
- // FIXME: Don't use regexes for this. It's completely unnecessary.
- let re = Regex::new(r"(^|[^$]|\b)\$(\w+)").unwrap();
- let text = re.replace_all(text, |&mut: refs: &Captures| -> String {
- let pre = refs.at(1).unwrap_or("");
- let name = refs.at(2).unwrap_or("");
- format!("{}{}", pre,
- match name.parse::<uint>() {
- None => self.name(name).unwrap_or("").to_string(),
- Some(i) => self.at(i).unwrap_or("").to_string(),
- })
- });
- let re = Regex::new(r"\$\$").unwrap();
- re.replace_all(&text[], NoExpand("$"))
- }
-
- /// Returns the number of captured groups.
- #[inline]
- pub fn len(&self) -> uint { self.locs.len() / 2 }
-
- /// Returns if there are no captured groups.
- #[inline]
- pub fn is_empty(&self) -> bool { self.len() == 0 }
-}
-
-/// An iterator over capture groups for a particular match of a regular
-/// expression.
-///
-/// `'t` is the lifetime of the matched text.
-#[derive(Clone)]
-pub struct SubCaptures<'t> {
- idx: uint,
- caps: &'t Captures<'t>,
-}
-
-impl<'t> Iterator for SubCaptures<'t> {
- type Item = &'t str;
-
- fn next(&mut self) -> Option<&'t str> {
- if self.idx < self.caps.len() {
- self.idx += 1;
- Some(self.caps.at(self.idx - 1).unwrap_or(""))
- } else {
- None
- }
- }
-}
-
-/// An iterator over capture group positions for a particular match of a
-/// regular expression.
-///
-/// Positions are byte indices in terms of the original string matched.
-///
-/// `'t` is the lifetime of the matched text.
-#[derive(Clone)]
-pub struct SubCapturesPos<'t> {
- idx: uint,
- caps: &'t Captures<'t>,
-}
-
-impl<'t> Iterator for SubCapturesPos<'t> {
- type Item = Option<(uint, uint)>;
-
- fn next(&mut self) -> Option<Option<(uint, uint)>> {
- if self.idx < self.caps.len() {
- self.idx += 1;
- Some(self.caps.pos(self.idx - 1))
- } else {
- None
- }
- }
-}
-
-/// An iterator that yields all non-overlapping capture groups matching a
-/// particular regular expression.
-///
-/// The iterator stops when no more matches can be found.
-///
-/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
-/// of the matched string.
-#[derive(Clone)]
-pub struct FindCaptures<'r, 't> {
- re: &'r Regex,
- search: &'t str,
- last_match: Option<uint>,
- last_end: uint,
-}
-
-impl<'r, 't> Iterator for FindCaptures<'r, 't> {
- type Item = Captures<'t>;
-
- fn next(&mut self) -> Option<Captures<'t>> {
- if self.last_end > self.search.len() {
- return None
- }
-
- let caps = exec_slice(self.re, Submatches, self.search,
- self.last_end, self.search.len());
- let (s, e) =
- if !has_match(&caps) {
- return None
- } else {
- (caps[0].unwrap(), caps[1].unwrap())
- };
-
- // Don't accept empty matches immediately following a match.
- // i.e., no infinite loops please.
- if e == s && Some(self.last_end) == self.last_match {
- self.last_end += 1;
- return self.next()
- }
- self.last_end = e;
- self.last_match = Some(self.last_end);
- Captures::new(self.re, self.search, caps)
- }
-}
-
-/// An iterator over all non-overlapping matches for a particular string.
-///
-/// The iterator yields a tuple of integers corresponding to the start and end
-/// of the match. The indices are byte offsets. The iterator stops when no more
-/// matches can be found.
-///
-/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
-/// of the matched string.
-#[derive(Clone)]
-pub struct FindMatches<'r, 't> {
- re: &'r Regex,
- search: &'t str,
- last_match: Option<uint>,
- last_end: uint,
-}
-
-impl<'r, 't> Iterator for FindMatches<'r, 't> {
- type Item = (uint, uint);
-
- fn next(&mut self) -> Option<(uint, uint)> {
- if self.last_end > self.search.len() {
- return None
- }
-
- let caps = exec_slice(self.re, Location, self.search,
- self.last_end, self.search.len());
- let (s, e) =
- if !has_match(&caps) {
- return None
- } else {
- (caps[0].unwrap(), caps[1].unwrap())
- };
-
- // Don't accept empty matches immediately following a match.
- // i.e., no infinite loops please.
- if e == s && Some(self.last_end) == self.last_match {
- self.last_end += 1;
- return self.next()
- }
- self.last_end = e;
- self.last_match = Some(self.last_end);
- Some((s, e))
- }
-}
-
-fn exec(re: &Regex, which: MatchKind, input: &str) -> CaptureLocs {
- exec_slice(re, which, input, 0, input.len())
-}
-
-fn exec_slice(re: &Regex, which: MatchKind,
- input: &str, s: uint, e: uint) -> CaptureLocs {
- match *re {
- Dynamic(ExDynamic { ref prog, .. }) => vm::run(which, prog, input, s, e),
- Native(ExNative { ref prog, .. }) => (*prog)(which, input, s, e),
- }
-}
-
-#[inline]
-fn has_match(caps: &CaptureLocs) -> bool {
- caps.len() >= 2 && caps[0].is_some() && caps[1].is_some()
-}
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-#![allow(non_snake_case)]
-
-use std::rand::{Rng, thread_rng};
-use stdtest::Bencher;
-use std::iter::repeat;
-
-use regex::{Regex, NoExpand};
-
-fn bench_assert_match(b: &mut Bencher, re: Regex, text: &str) {
- b.iter(|| if !re.is_match(text) { panic!("no match") });
-}
-
-#[bench]
-fn no_exponential(b: &mut Bencher) {
- let n = 100;
- let re = Regex::new(format!("{}{}",
- repeat("a?").take(n).collect::<String>(),
- repeat("a").take(n).collect::<String>()).as_slice()).unwrap();
- let text = repeat("a").take(n).collect::<String>();
- bench_assert_match(b, re, text.as_slice());
-}
-
-#[bench]
-fn literal(b: &mut Bencher) {
- let re = regex!("y");
- let text = format!("{}y", repeat("x").take(50).collect::<String>());
- bench_assert_match(b, re, text.as_slice());
-}
-
-#[bench]
-fn not_literal(b: &mut Bencher) {
- let re = regex!(".y");
- let text = format!("{}y", repeat("x").take(50).collect::<String>());
- bench_assert_match(b, re, text.as_slice());
-}
-
-#[bench]
-fn match_class(b: &mut Bencher) {
- let re = regex!("[abcdw]");
- let text = format!("{}w", repeat("xxxx").take(20).collect::<String>());
- bench_assert_match(b, re, text.as_slice());
-}
-
-#[bench]
-fn match_class_in_range(b: &mut Bencher) {
- // 'b' is between 'a' and 'c', so the class range checking doesn't help.
- let re = regex!("[ac]");
- let text = format!("{}c", repeat("bbbb").take(20).collect::<String>());
- bench_assert_match(b, re, text.as_slice());
-}
-
-#[bench]
-fn replace_all(b: &mut Bencher) {
- let re = regex!("[cjrw]");
- let text = "abcdefghijklmnopqrstuvwxyz";
- // FIXME: This isn't using the $name expand stuff.
- // It's possible RE2/Go is using it, but currently, the expand in this
- // crate is actually compiling a regex, so it's incredibly slow.
- b.iter(|| re.replace_all(text, NoExpand("")));
-}
-
-#[bench]
-fn anchored_literal_short_non_match(b: &mut Bencher) {
- let re = regex!("^zbc(d|e)");
- let text = "abcdefghijklmnopqrstuvwxyz";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn anchored_literal_long_non_match(b: &mut Bencher) {
- let re = regex!("^zbc(d|e)");
- let text = repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>();
- b.iter(|| re.is_match(text.as_slice()));
-}
-
-#[bench]
-fn anchored_literal_short_match(b: &mut Bencher) {
- let re = regex!("^.bc(d|e)");
- let text = "abcdefghijklmnopqrstuvwxyz";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn anchored_literal_long_match(b: &mut Bencher) {
- let re = regex!("^.bc(d|e)");
- let text = repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>();
- b.iter(|| re.is_match(text.as_slice()));
-}
-
-#[bench]
-fn one_pass_short_a(b: &mut Bencher) {
- let re = regex!("^.bc(d|e)*$");
- let text = "abcddddddeeeededd";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn one_pass_short_a_not(b: &mut Bencher) {
- let re = regex!(".bc(d|e)*$");
- let text = "abcddddddeeeededd";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn one_pass_short_b(b: &mut Bencher) {
- let re = regex!("^.bc(?:d|e)*$");
- let text = "abcddddddeeeededd";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn one_pass_short_b_not(b: &mut Bencher) {
- let re = regex!(".bc(?:d|e)*$");
- let text = "abcddddddeeeededd";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn one_pass_long_prefix(b: &mut Bencher) {
- let re = regex!("^abcdefghijklmnopqrstuvwxyz.*$");
- let text = "abcdefghijklmnopqrstuvwxyz";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn one_pass_long_prefix_not(b: &mut Bencher) {
- let re = regex!("^.bcdefghijklmnopqrstuvwxyz.*$");
- let text = "abcdefghijklmnopqrstuvwxyz";
- b.iter(|| re.is_match(text));
-}
-
-macro_rules! throughput {
- ($name:ident, $regex:expr, $size:expr) => (
- #[bench]
- fn $name(b: &mut Bencher) {
- let text = gen_text($size);
- b.bytes = $size;
- b.iter(|| if $regex.is_match(text.as_slice()) { panic!("match") });
- }
- );
-}
-
-fn easy0() -> Regex { regex!("ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
-fn easy1() -> Regex { regex!("A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$") }
-fn medium() -> Regex { regex!("[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
-fn hard() -> Regex { regex!("[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
-
-fn gen_text(n: uint) -> String {
- let mut rng = thread_rng();
- let mut bytes = rng.gen_ascii_chars().map(|n| n as u8).take(n)
- .collect::<Vec<u8>>();
- for (i, b) in bytes.iter_mut().enumerate() {
- if i % 20 == 0 {
- *b = b'\n'
- }
- }
- String::from_utf8(bytes).unwrap()
-}
-
-throughput!{easy0_32, easy0(), 32}
-throughput!{easy0_1K, easy0(), 1<<10}
-throughput!{easy0_32K, easy0(), 32<<10}
-
-throughput!{easy1_32, easy1(), 32}
-throughput!{easy1_1K, easy1(), 1<<10}
-throughput!{easy1_32K, easy1(), 32<<10}
-
-throughput!{medium_32, medium(), 32}
-throughput!{medium_1K, medium(), 1<<10}
-throughput!{medium_32K,medium(), 32<<10}
-
-throughput!{hard_32, hard(), 32}
-throughput!{hard_1K, hard(), 1<<10}
-throughput!{hard_32K,hard(), 32<<10}
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// ignore-tidy-linelength
-
-// DO NOT EDIT. Automatically generated by 'src/etc/regex-match-tests'
-// on 2014-04-23 01:33:36.539280.
-
-// Tests from basic.dat
-mat!{match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18))}
-mat!{match_basic_4, r"a...b", r"abababbb", Some((2, 7))}
-mat!{match_basic_5, r"XXXXXX", r"..XXXXXX", Some((2, 8))}
-mat!{match_basic_6, r"\)", r"()", Some((1, 2))}
-mat!{match_basic_7, r"a]", r"a]a", Some((0, 2))}
-mat!{match_basic_9, r"\}", r"}", Some((0, 1))}
-mat!{match_basic_10, r"\]", r"]", Some((0, 1))}
-mat!{match_basic_12, r"]", r"]", Some((0, 1))}
-mat!{match_basic_15, r"^a", r"ax", Some((0, 1))}
-mat!{match_basic_16, r"\^a", r"a^a", Some((1, 3))}
-mat!{match_basic_17, r"a\^", r"a^", Some((0, 2))}
-mat!{match_basic_18, r"a$", r"aa", Some((1, 2))}
-mat!{match_basic_19, r"a\$", r"a$", Some((0, 2))}
-mat!{match_basic_20, r"^$", r"", Some((0, 0))}
-mat!{match_basic_21, r"$^", r"", Some((0, 0))}
-mat!{match_basic_22, r"a($)", r"aa", Some((1, 2)), Some((2, 2))}
-mat!{match_basic_23, r"a*(^a)", r"aa", Some((0, 1)), Some((0, 1))}
-mat!{match_basic_24, r"(..)*(...)*", r"a", Some((0, 0))}
-mat!{match_basic_25, r"(..)*(...)*", r"abcd", Some((0, 4)), Some((2, 4))}
-mat!{match_basic_26, r"(ab|a)(bc|c)", r"abc", Some((0, 3)), Some((0, 2)), Some((2, 3))}
-mat!{match_basic_27, r"(ab)c|abc", r"abc", Some((0, 3)), Some((0, 2))}
-mat!{match_basic_28, r"a{0}b", r"ab", Some((1, 2))}
-mat!{match_basic_29, r"(a*)(b?)(b+)b{3}", r"aaabbbbbbb", Some((0, 10)), Some((0, 3)), Some((3, 4)), Some((4, 7))}
-mat!{match_basic_30, r"(a*)(b{0,1})(b{1,})b{3}", r"aaabbbbbbb", Some((0, 10)), Some((0, 3)), Some((3, 4)), Some((4, 7))}
-mat!{match_basic_32, r"((a|a)|a)", r"a", Some((0, 1)), Some((0, 1)), Some((0, 1))}
-mat!{match_basic_33, r"(a*)(a|aa)", r"aaaa", Some((0, 4)), Some((0, 3)), Some((3, 4))}
-mat!{match_basic_34, r"a*(a.|aa)", r"aaaa", Some((0, 4)), Some((2, 4))}
-mat!{match_basic_35, r"a(b)|c(d)|a(e)f", r"aef", Some((0, 3)), None, None, Some((1, 2))}
-mat!{match_basic_36, r"(a|b)?.*", r"b", Some((0, 1)), Some((0, 1))}
-mat!{match_basic_37, r"(a|b)c|a(b|c)", r"ac", Some((0, 2)), Some((0, 1))}
-mat!{match_basic_38, r"(a|b)c|a(b|c)", r"ab", Some((0, 2)), None, Some((1, 2))}
-mat!{match_basic_39, r"(a|b)*c|(a|ab)*c", r"abc", Some((0, 3)), Some((1, 2))}
-mat!{match_basic_40, r"(a|b)*c|(a|ab)*c", r"xc", Some((1, 2))}
-mat!{match_basic_41, r"(.a|.b).*|.*(.a|.b)", r"xa", Some((0, 2)), Some((0, 2))}
-mat!{match_basic_42, r"a?(ab|ba)ab", r"abab", Some((0, 4)), Some((0, 2))}
-mat!{match_basic_43, r"a?(ac{0}b|ba)ab", r"abab", Some((0, 4)), Some((0, 2))}
-mat!{match_basic_44, r"ab|abab", r"abbabab", Some((0, 2))}
-mat!{match_basic_45, r"aba|bab|bba", r"baaabbbaba", Some((5, 8))}
-mat!{match_basic_46, r"aba|bab", r"baaabbbaba", Some((6, 9))}
-mat!{match_basic_47, r"(aa|aaa)*|(a|aaaaa)", r"aa", Some((0, 2)), Some((0, 2))}
-mat!{match_basic_48, r"(a.|.a.)*|(a|.a...)", r"aa", Some((0, 2)), Some((0, 2))}
-mat!{match_basic_49, r"ab|a", r"xabc", Some((1, 3))}
-mat!{match_basic_50, r"ab|a", r"xxabc", Some((2, 4))}
-mat!{match_basic_51, r"(?i)(Ab|cD)*", r"aBcD", Some((0, 4)), Some((2, 4))}
-mat!{match_basic_52, r"[^-]", r"--a", Some((2, 3))}
-mat!{match_basic_53, r"[a-]*", r"--a", Some((0, 3))}
-mat!{match_basic_54, r"[a-m-]*", r"--amoma--", Some((0, 4))}
-mat!{match_basic_55, r":::1:::0:|:::1:1:0:", r":::0:::1:::1:::0:", Some((8, 17))}
-mat!{match_basic_56, r":::1:::0:|:::1:1:1:", r":::0:::1:::1:::0:", Some((8, 17))}
-mat!{match_basic_57, r"[[:upper:]]", r"A", Some((0, 1))}
-mat!{match_basic_58, r"[[:lower:]]+", r"`az{", Some((1, 3))}
-mat!{match_basic_59, r"[[:upper:]]+", r"@AZ[", Some((1, 3))}
-mat!{match_basic_65, r"
-", r"
-", Some((0, 1))}
-mat!{match_basic_66, r"
-", r"
-", Some((0, 1))}
-mat!{match_basic_67, r"[^a]", r"
-", Some((0, 1))}
-mat!{match_basic_68, r"
-a", r"
-a", Some((0, 2))}
-mat!{match_basic_69, r"(a)(b)(c)", r"abc", Some((0, 3)), Some((0, 1)), Some((1, 2)), Some((2, 3))}
-mat!{match_basic_70, r"xxx", r"xxx", Some((0, 3))}
-mat!{match_basic_71, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"feb 6,", Some((0, 6))}
-mat!{match_basic_72, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"2/7", Some((0, 3))}
-mat!{match_basic_73, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"feb 1,Feb 6", Some((5, 11))}
-mat!{match_basic_74, r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", r"x", Some((0, 1)), Some((0, 1)), Some((0, 1))}
-mat!{match_basic_75, r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", r"xx", Some((0, 2)), Some((1, 2)), Some((1, 2))}
-mat!{match_basic_76, r"a?(ab|ba)*", r"ababababababababababababababababababababababababababababababababababababababababa", Some((0, 81)), Some((79, 81))}
-mat!{match_basic_77, r"abaa|abbaa|abbbaa|abbbbaa", r"ababbabbbabbbabbbbabbbbaa", Some((18, 25))}
-mat!{match_basic_78, r"abaa|abbaa|abbbaa|abbbbaa", r"ababbabbbabbbabbbbabaa", Some((18, 22))}
-mat!{match_basic_79, r"aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", r"baaabbbabac", Some((7, 11))}
-mat!{match_basic_80, r".*", r"\ 1\7f", Some((0, 2))}
-mat!{match_basic_81, r"aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", r"XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", Some((53, 57))}
-mat!{match_basic_83, r"a*a*a*a*a*b", r"aaaaaaaaab", Some((0, 10))}
-mat!{match_basic_84, r"^", r"", Some((0, 0))}
-mat!{match_basic_85, r"$", r"", Some((0, 0))}
-mat!{match_basic_86, r"^$", r"", Some((0, 0))}
-mat!{match_basic_87, r"^a$", r"a", Some((0, 1))}
-mat!{match_basic_88, r"abc", r"abc", Some((0, 3))}
-mat!{match_basic_89, r"abc", r"xabcy", Some((1, 4))}
-mat!{match_basic_90, r"abc", r"ababc", Some((2, 5))}
-mat!{match_basic_91, r"ab*c", r"abc", Some((0, 3))}
-mat!{match_basic_92, r"ab*bc", r"abc", Some((0, 3))}
-mat!{match_basic_93, r"ab*bc", r"abbc", Some((0, 4))}
-mat!{match_basic_94, r"ab*bc", r"abbbbc", Some((0, 6))}
-mat!{match_basic_95, r"ab+bc", r"abbc", Some((0, 4))}
-mat!{match_basic_96, r"ab+bc", r"abbbbc", Some((0, 6))}
-mat!{match_basic_97, r"ab?bc", r"abbc", Some((0, 4))}
-mat!{match_basic_98, r"ab?bc", r"abc", Some((0, 3))}
-mat!{match_basic_99, r"ab?c", r"abc", Some((0, 3))}
-mat!{match_basic_100, r"^abc$", r"abc", Some((0, 3))}
-mat!{match_basic_101, r"^abc", r"abcc", Some((0, 3))}
-mat!{match_basic_102, r"abc$", r"aabc", Some((1, 4))}
-mat!{match_basic_103, r"^", r"abc", Some((0, 0))}
-mat!{match_basic_104, r"$", r"abc", Some((3, 3))}
-mat!{match_basic_105, r"a.c", r"abc", Some((0, 3))}
-mat!{match_basic_106, r"a.c", r"axc", Some((0, 3))}
-mat!{match_basic_107, r"a.*c", r"axyzc", Some((0, 5))}
-mat!{match_basic_108, r"a[bc]d", r"abd", Some((0, 3))}
-mat!{match_basic_109, r"a[b-d]e", r"ace", Some((0, 3))}
-mat!{match_basic_110, r"a[b-d]", r"aac", Some((1, 3))}
-mat!{match_basic_111, r"a[-b]", r"a-", Some((0, 2))}
-mat!{match_basic_112, r"a[b-]", r"a-", Some((0, 2))}
-mat!{match_basic_113, r"a]", r"a]", Some((0, 2))}
-mat!{match_basic_114, r"a[]]b", r"a]b", Some((0, 3))}
-mat!{match_basic_115, r"a[^bc]d", r"aed", Some((0, 3))}
-mat!{match_basic_116, r"a[^-b]c", r"adc", Some((0, 3))}
-mat!{match_basic_117, r"a[^]b]c", r"adc", Some((0, 3))}
-mat!{match_basic_118, r"ab|cd", r"abc", Some((0, 2))}
-mat!{match_basic_119, r"ab|cd", r"abcd", Some((0, 2))}
-mat!{match_basic_120, r"a\(b", r"a(b", Some((0, 3))}
-mat!{match_basic_121, r"a\(*b", r"ab", Some((0, 2))}
-mat!{match_basic_122, r"a\(*b", r"a((b", Some((0, 4))}
-mat!{match_basic_123, r"((a))", r"abc", Some((0, 1)), Some((0, 1)), Some((0, 1))}
-mat!{match_basic_124, r"(a)b(c)", r"abc", Some((0, 3)), Some((0, 1)), Some((2, 3))}
-mat!{match_basic_125, r"a+b+c", r"aabbabc", Some((4, 7))}
-mat!{match_basic_126, r"a*", r"aaa", Some((0, 3))}
-mat!{match_basic_128, r"(a*)*", r"-", Some((0, 0)), None}
-mat!{match_basic_129, r"(a*)+", r"-", Some((0, 0)), Some((0, 0))}
-mat!{match_basic_131, r"(a*|b)*", r"-", Some((0, 0)), None}
-mat!{match_basic_132, r"(a+|b)*", r"ab", Some((0, 2)), Some((1, 2))}
-mat!{match_basic_133, r"(a+|b)+", r"ab", Some((0, 2)), Some((1, 2))}
-mat!{match_basic_134, r"(a+|b)?", r"ab", Some((0, 1)), Some((0, 1))}
-mat!{match_basic_135, r"[^ab]*", r"cde", Some((0, 3))}
-mat!{match_basic_137, r"(^)*", r"-", Some((0, 0)), None}
-mat!{match_basic_138, r"a*", r"", Some((0, 0))}
-mat!{match_basic_139, r"([abc])*d", r"abbbcd", Some((0, 6)), Some((4, 5))}
-mat!{match_basic_140, r"([abc])*bcd", r"abcd", Some((0, 4)), Some((0, 1))}
-mat!{match_basic_141, r"a|b|c|d|e", r"e", Some((0, 1))}
-mat!{match_basic_142, r"(a|b|c|d|e)f", r"ef", Some((0, 2)), Some((0, 1))}
-mat!{match_basic_144, r"((a*|b))*", r"-", Some((0, 0)), None, None}
-mat!{match_basic_145, r"abcd*efg", r"abcdefg", Some((0, 7))}
-mat!{match_basic_146, r"ab*", r"xabyabbbz", Some((1, 3))}
-mat!{match_basic_147, r"ab*", r"xayabbbz", Some((1, 2))}
-mat!{match_basic_148, r"(ab|cd)e", r"abcde", Some((2, 5)), Some((2, 4))}
-mat!{match_basic_149, r"[abhgefdc]ij", r"hij", Some((0, 3))}
-mat!{match_basic_150, r"(a|b)c*d", r"abcd", Some((1, 4)), Some((1, 2))}
-mat!{match_basic_151, r"(ab|ab*)bc", r"abc", Some((0, 3)), Some((0, 1))}
-mat!{match_basic_152, r"a([bc]*)c*", r"abc", Some((0, 3)), Some((1, 3))}
-mat!{match_basic_153, r"a([bc]*)(c*d)", r"abcd", Some((0, 4)), Some((1, 3)), Some((3, 4))}
-mat!{match_basic_154, r"a([bc]+)(c*d)", r"abcd", Some((0, 4)), Some((1, 3)), Some((3, 4))}
-mat!{match_basic_155, r"a([bc]*)(c+d)", r"abcd", Some((0, 4)), Some((1, 2)), Some((2, 4))}
-mat!{match_basic_156, r"a[bcd]*dcdcde", r"adcdcde", Some((0, 7))}
-mat!{match_basic_157, r"(ab|a)b*c", r"abc", Some((0, 3)), Some((0, 2))}
-mat!{match_basic_158, r"((a)(b)c)(d)", r"abcd", Some((0, 4)), Some((0, 3)), Some((0, 1)), Some((1, 2)), Some((3, 4))}
-mat!{match_basic_159, r"[A-Za-z_][A-Za-z0-9_]*", r"alpha", Some((0, 5))}
-mat!{match_basic_160, r"^a(bc+|b[eh])g|.h$", r"abh", Some((1, 3))}
-mat!{match_basic_161, r"(bc+d$|ef*g.|h?i(j|k))", r"effgz", Some((0, 5)), Some((0, 5))}
-mat!{match_basic_162, r"(bc+d$|ef*g.|h?i(j|k))", r"ij", Some((0, 2)), Some((0, 2)), Some((1, 2))}
-mat!{match_basic_163, r"(bc+d$|ef*g.|h?i(j|k))", r"reffgz", Some((1, 6)), Some((1, 6))}
-mat!{match_basic_164, r"(((((((((a)))))))))", r"a", Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1))}
-mat!{match_basic_165, r"multiple words", r"multiple words yeah", Some((0, 14))}
-mat!{match_basic_166, r"(.*)c(.*)", r"abcde", Some((0, 5)), Some((0, 2)), Some((3, 5))}
-mat!{match_basic_167, r"abcd", r"abcd", Some((0, 4))}
-mat!{match_basic_168, r"a(bc)d", r"abcd", Some((0, 4)), Some((1, 3))}
-mat!{match_basic_169, r"a[\ 1-\ 3]?c", r"a\ 2c", Some((0, 3))}
-mat!{match_basic_170, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Qaddafi", Some((0, 15)), None, Some((10, 12))}
-mat!{match_basic_171, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mo'ammar Gadhafi", Some((0, 16)), None, Some((11, 13))}
-mat!{match_basic_172, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Kaddafi", Some((0, 15)), None, Some((10, 12))}
-mat!{match_basic_173, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Qadhafi", Some((0, 15)), None, Some((10, 12))}
-mat!{match_basic_174, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Gadafi", Some((0, 14)), None, Some((10, 11))}
-mat!{match_basic_175, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mu'ammar Qadafi", Some((0, 15)), None, Some((11, 12))}
-mat!{match_basic_176, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moamar Gaddafi", Some((0, 14)), None, Some((9, 11))}
-mat!{match_basic_177, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mu'ammar Qadhdhafi", Some((0, 18)), None, Some((13, 15))}
-mat!{match_basic_178, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Khaddafi", Some((0, 16)), None, Some((11, 13))}
-mat!{match_basic_179, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghaddafy", Some((0, 16)), None, Some((11, 13))}
-mat!{match_basic_180, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghadafi", Some((0, 15)), None, Some((11, 12))}
-mat!{match_basic_181, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghaddafi", Some((0, 16)), None, Some((11, 13))}
-mat!{match_basic_182, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muamar Kaddafi", Some((0, 14)), None, Some((9, 11))}
-mat!{match_basic_183, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Quathafi", Some((0, 16)), None, Some((11, 13))}
-mat!{match_basic_184, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Gheddafi", Some((0, 16)), None, Some((11, 13))}
-mat!{match_basic_185, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moammar Khadafy", Some((0, 15)), None, Some((11, 12))}
-mat!{match_basic_186, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moammar Qudhafi", Some((0, 15)), None, Some((10, 12))}
-mat!{match_basic_187, r"a+(b|c)*d+", r"aabcdd", Some((0, 6)), Some((3, 4))}
-mat!{match_basic_188, r"^.+$", r"vivi", Some((0, 4))}
-mat!{match_basic_189, r"^(.+)$", r"vivi", Some((0, 4)), Some((0, 4))}
-mat!{match_basic_190, r"^([^!.]+).att.com!(.+)$", r"gryphon.att.com!eby", Some((0, 19)), Some((0, 7)), Some((16, 19))}
-mat!{match_basic_191, r"^([^!]+!)?([^!]+)$", r"bas", Some((0, 3)), None, Some((0, 3))}
-mat!{match_basic_192, r"^([^!]+!)?([^!]+)$", r"bar!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))}
-mat!{match_basic_193, r"^([^!]+!)?([^!]+)$", r"foo!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))}
-mat!{match_basic_194, r"^.+!([^!]+!)([^!]+)$", r"foo!bar!bas", Some((0, 11)), Some((4, 8)), Some((8, 11))}
-mat!{match_basic_195, r"((foo)|(bar))!bas", r"bar!bas", Some((0, 7)), Some((0, 3)), None, Some((0, 3))}
-mat!{match_basic_196, r"((foo)|(bar))!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)), None, Some((4, 7))}
-mat!{match_basic_197, r"((foo)|(bar))!bas", r"foo!bas", Some((0, 7)), Some((0, 3)), Some((0, 3))}
-mat!{match_basic_198, r"((foo)|bar)!bas", r"bar!bas", Some((0, 7)), Some((0, 3))}
-mat!{match_basic_199, r"((foo)|bar)!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7))}
-mat!{match_basic_200, r"((foo)|bar)!bas", r"foo!bas", Some((0, 7)), Some((0, 3)), Some((0, 3))}
-mat!{match_basic_201, r"(foo|(bar))!bas", r"bar!bas", Some((0, 7)), Some((0, 3)), Some((0, 3))}
-mat!{match_basic_202, r"(foo|(bar))!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)), Some((4, 7))}
-mat!{match_basic_203, r"(foo|(bar))!bas", r"foo!bas", Some((0, 7)), Some((0, 3))}
-mat!{match_basic_204, r"(foo|bar)!bas", r"bar!bas", Some((0, 7)), Some((0, 3))}
-mat!{match_basic_205, r"(foo|bar)!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7))}
-mat!{match_basic_206, r"(foo|bar)!bas", r"foo!bas", Some((0, 7)), Some((0, 3))}
-mat!{match_basic_207, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bar!bas", Some((0, 11)), Some((0, 11)), None, None, Some((4, 8)), Some((8, 11))}
-mat!{match_basic_208, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"bas", Some((0, 3)), None, Some((0, 3))}
-mat!{match_basic_209, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"bar!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))}
-mat!{match_basic_210, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"foo!bar!bas", Some((0, 11)), None, None, Some((4, 8)), Some((8, 11))}
-mat!{match_basic_211, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"foo!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))}
-mat!{match_basic_212, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"bas", Some((0, 3)), Some((0, 3)), None, Some((0, 3))}
-mat!{match_basic_213, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"bar!bas", Some((0, 7)), Some((0, 7)), Some((0, 4)), Some((4, 7))}
-mat!{match_basic_214, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bar!bas", Some((0, 11)), Some((0, 11)), None, None, Some((4, 8)), Some((8, 11))}
-mat!{match_basic_215, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bas", Some((0, 7)), Some((0, 7)), Some((0, 4)), Some((4, 7))}
-mat!{match_basic_216, r".*(/XXX).*", r"/XXX", Some((0, 4)), Some((0, 4))}
-mat!{match_basic_217, r".*(\\XXX).*", r"\XXX", Some((0, 4)), Some((0, 4))}
-mat!{match_basic_218, r"\\XXX", r"\XXX", Some((0, 4))}
-mat!{match_basic_219, r".*(/000).*", r"/000", Some((0, 4)), Some((0, 4))}
-mat!{match_basic_220, r".*(\\000).*", r"\000", Some((0, 4)), Some((0, 4))}
-mat!{match_basic_221, r"\\000", r"\000", Some((0, 4))}
-
-// Tests from nullsubexpr.dat
-mat!{match_nullsubexpr_3, r"(a*)*", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_5, r"(a*)*", r"x", Some((0, 0)), None}
-mat!{match_nullsubexpr_6, r"(a*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_7, r"(a*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_8, r"(a*)+", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_9, r"(a*)+", r"x", Some((0, 0)), Some((0, 0))}
-mat!{match_nullsubexpr_10, r"(a*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_11, r"(a*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_12, r"(a+)*", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_13, r"(a+)*", r"x", Some((0, 0))}
-mat!{match_nullsubexpr_14, r"(a+)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_15, r"(a+)*", r"aaaaaax", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_16, r"(a+)+", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_17, r"(a+)+", r"x", None}
-mat!{match_nullsubexpr_18, r"(a+)+", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_19, r"(a+)+", r"aaaaaax", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_21, r"([a]*)*", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_23, r"([a]*)*", r"x", Some((0, 0)), None}
-mat!{match_nullsubexpr_24, r"([a]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_25, r"([a]*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_26, r"([a]*)+", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_27, r"([a]*)+", r"x", Some((0, 0)), Some((0, 0))}
-mat!{match_nullsubexpr_28, r"([a]*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_29, r"([a]*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_30, r"([^b]*)*", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_32, r"([^b]*)*", r"b", Some((0, 0)), None}
-mat!{match_nullsubexpr_33, r"([^b]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_34, r"([^b]*)*", r"aaaaaab", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_35, r"([ab]*)*", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_36, r"([ab]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_37, r"([ab]*)*", r"ababab", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_38, r"([ab]*)*", r"bababa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_39, r"([ab]*)*", r"b", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_40, r"([ab]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_41, r"([ab]*)*", r"aaaabcde", Some((0, 5)), Some((0, 5))}
-mat!{match_nullsubexpr_42, r"([^a]*)*", r"b", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_43, r"([^a]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_45, r"([^a]*)*", r"aaaaaa", Some((0, 0)), None}
-mat!{match_nullsubexpr_46, r"([^ab]*)*", r"ccccxx", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_48, r"([^ab]*)*", r"ababab", Some((0, 0)), None}
-mat!{match_nullsubexpr_50, r"((z)+|a)*", r"zabcde", Some((0, 2)), Some((1, 2))}
-mat!{match_nullsubexpr_69, r"(a*)*(x)", r"x", Some((0, 1)), None, Some((0, 1))}
-mat!{match_nullsubexpr_70, r"(a*)*(x)", r"ax", Some((0, 2)), Some((0, 1)), Some((1, 2))}
-mat!{match_nullsubexpr_71, r"(a*)*(x)", r"axa", Some((0, 2)), Some((0, 1)), Some((1, 2))}
-mat!{match_nullsubexpr_73, r"(a*)+(x)", r"x", Some((0, 1)), Some((0, 0)), Some((0, 1))}
-mat!{match_nullsubexpr_74, r"(a*)+(x)", r"ax", Some((0, 2)), Some((0, 1)), Some((1, 2))}
-mat!{match_nullsubexpr_75, r"(a*)+(x)", r"axa", Some((0, 2)), Some((0, 1)), Some((1, 2))}
-mat!{match_nullsubexpr_77, r"(a*){2}(x)", r"x", Some((0, 1)), Some((0, 0)), Some((0, 1))}
-mat!{match_nullsubexpr_78, r"(a*){2}(x)", r"ax", Some((0, 2)), Some((1, 1)), Some((1, 2))}
-mat!{match_nullsubexpr_79, r"(a*){2}(x)", r"axa", Some((0, 2)), Some((1, 1)), Some((1, 2))}
-
-// Tests from repetition.dat
-mat!{match_repetition_10, r"((..)|(.))", r"", None}
-mat!{match_repetition_11, r"((..)|(.))((..)|(.))", r"", None}
-mat!{match_repetition_12, r"((..)|(.))((..)|(.))((..)|(.))", r"", None}
-mat!{match_repetition_14, r"((..)|(.)){1}", r"", None}
-mat!{match_repetition_15, r"((..)|(.)){2}", r"", None}
-mat!{match_repetition_16, r"((..)|(.)){3}", r"", None}
-mat!{match_repetition_18, r"((..)|(.))*", r"", Some((0, 0))}
-mat!{match_repetition_20, r"((..)|(.))", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1))}
-mat!{match_repetition_21, r"((..)|(.))((..)|(.))", r"a", None}
-mat!{match_repetition_22, r"((..)|(.))((..)|(.))((..)|(.))", r"a", None}
-mat!{match_repetition_24, r"((..)|(.)){1}", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1))}
-mat!{match_repetition_25, r"((..)|(.)){2}", r"a", None}
-mat!{match_repetition_26, r"((..)|(.)){3}", r"a", None}
-mat!{match_repetition_28, r"((..)|(.))*", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1))}
-mat!{match_repetition_30, r"((..)|(.))", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_31, r"((..)|(.))((..)|(.))", r"aa", Some((0, 2)), Some((0, 1)), None, Some((0, 1)), Some((1, 2)), None, Some((1, 2))}
-mat!{match_repetition_32, r"((..)|(.))((..)|(.))((..)|(.))", r"aa", None}
-mat!{match_repetition_34, r"((..)|(.)){1}", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_35, r"((..)|(.)){2}", r"aa", Some((0, 2)), Some((1, 2)), None, Some((1, 2))}
-mat!{match_repetition_36, r"((..)|(.)){3}", r"aa", None}
-mat!{match_repetition_38, r"((..)|(.))*", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_40, r"((..)|(.))", r"aaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_41, r"((..)|(.))((..)|(.))", r"aaa", Some((0, 3)), Some((0, 2)), Some((0, 2)), None, Some((2, 3)), None, Some((2, 3))}
-mat!{match_repetition_42, r"((..)|(.))((..)|(.))((..)|(.))", r"aaa", Some((0, 3)), Some((0, 1)), None, Some((0, 1)), Some((1, 2)), None, Some((1, 2)), Some((2, 3)), None, Some((2, 3))}
-mat!{match_repetition_44, r"((..)|(.)){1}", r"aaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_46, r"((..)|(.)){2}", r"aaa", Some((0, 3)), Some((2, 3)), Some((0, 2)), Some((2, 3))}
-mat!{match_repetition_47, r"((..)|(.)){3}", r"aaa", Some((0, 3)), Some((2, 3)), None, Some((2, 3))}
-mat!{match_repetition_50, r"((..)|(.))*", r"aaa", Some((0, 3)), Some((2, 3)), Some((0, 2)), Some((2, 3))}
-mat!{match_repetition_52, r"((..)|(.))", r"aaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_53, r"((..)|(.))((..)|(.))", r"aaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_54, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 3)), None, Some((2, 3)), Some((3, 4)), None, Some((3, 4))}
-mat!{match_repetition_56, r"((..)|(.)){1}", r"aaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_57, r"((..)|(.)){2}", r"aaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_59, r"((..)|(.)){3}", r"aaaa", Some((0, 4)), Some((3, 4)), Some((0, 2)), Some((3, 4))}
-mat!{match_repetition_61, r"((..)|(.))*", r"aaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_63, r"((..)|(.))", r"aaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_64, r"((..)|(.))((..)|(.))", r"aaaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_65, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaaa", Some((0, 5)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None, Some((4, 5)), None, Some((4, 5))}
-mat!{match_repetition_67, r"((..)|(.)){1}", r"aaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_68, r"((..)|(.)){2}", r"aaaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_70, r"((..)|(.)){3}", r"aaaaa", Some((0, 5)), Some((4, 5)), Some((2, 4)), Some((4, 5))}
-mat!{match_repetition_73, r"((..)|(.))*", r"aaaaa", Some((0, 5)), Some((4, 5)), Some((2, 4)), Some((4, 5))}
-mat!{match_repetition_75, r"((..)|(.))", r"aaaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_76, r"((..)|(.))((..)|(.))", r"aaaaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_77, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaaaa", Some((0, 6)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None, Some((4, 6)), Some((4, 6)), None}
-mat!{match_repetition_79, r"((..)|(.)){1}", r"aaaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_80, r"((..)|(.)){2}", r"aaaaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_81, r"((..)|(.)){3}", r"aaaaaa", Some((0, 6)), Some((4, 6)), Some((4, 6)), None}
-mat!{match_repetition_83, r"((..)|(.))*", r"aaaaaa", Some((0, 6)), Some((4, 6)), Some((4, 6)), None}
-mat!{match_repetition_90, r"X(.?){0,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_91, r"X(.?){1,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_92, r"X(.?){2,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_93, r"X(.?){3,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_94, r"X(.?){4,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_95, r"X(.?){5,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_96, r"X(.?){6,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_97, r"X(.?){7,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_98, r"X(.?){8,}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_100, r"X(.?){0,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_102, r"X(.?){1,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_104, r"X(.?){2,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_106, r"X(.?){3,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_108, r"X(.?){4,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_110, r"X(.?){5,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_112, r"X(.?){6,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_114, r"X(.?){7,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_115, r"X(.?){8,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_126, r"(a|ab|c|bcd){0,}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
-mat!{match_repetition_127, r"(a|ab|c|bcd){1,}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
-mat!{match_repetition_128, r"(a|ab|c|bcd){2,}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))}
-mat!{match_repetition_129, r"(a|ab|c|bcd){3,}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))}
-mat!{match_repetition_130, r"(a|ab|c|bcd){4,}(d*)", r"ababcd", None}
-mat!{match_repetition_131, r"(a|ab|c|bcd){0,10}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
-mat!{match_repetition_132, r"(a|ab|c|bcd){1,10}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
-mat!{match_repetition_133, r"(a|ab|c|bcd){2,10}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))}
-mat!{match_repetition_134, r"(a|ab|c|bcd){3,10}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))}
-mat!{match_repetition_135, r"(a|ab|c|bcd){4,10}(d*)", r"ababcd", None}
-mat!{match_repetition_136, r"(a|ab|c|bcd)*(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
-mat!{match_repetition_137, r"(a|ab|c|bcd)+(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
-mat!{match_repetition_143, r"(ab|a|c|bcd){0,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_145, r"(ab|a|c|bcd){1,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_147, r"(ab|a|c|bcd){2,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_149, r"(ab|a|c|bcd){3,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_150, r"(ab|a|c|bcd){4,}(d*)", r"ababcd", None}
-mat!{match_repetition_152, r"(ab|a|c|bcd){0,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_154, r"(ab|a|c|bcd){1,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_156, r"(ab|a|c|bcd){2,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_158, r"(ab|a|c|bcd){3,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_159, r"(ab|a|c|bcd){4,10}(d*)", r"ababcd", None}
-mat!{match_repetition_161, r"(ab|a|c|bcd)*(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_163, r"(ab|a|c|bcd)+(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-macro_rules! regex {
- ($re:expr) => (
- match ::regex::Regex::new($re) {
- Ok(re) => re,
- Err(err) => panic!("{:?}", err),
- }
- );
-}
-
-#[path = "bench.rs"]
-mod dynamic_bench;
-#[path = "tests.rs"]
-mod dynamic_tests;
-
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-use regex::Regex;
-static RE: Regex = regex!(r"\d+");
-
-#[test]
-fn static_splitn() {
- let text = "cauchy123plato456tyler789binx";
- let subs: Vec<&str> = RE.splitn(text, 2).collect();
- assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
-}
-
-#[test]
-fn static_split() {
- let text = "cauchy123plato456tyler789binx";
- let subs: Vec<&str> = RE.split(text).collect();
- assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
-}
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// ignore-tidy-linelength
-// ignore-lexer-test FIXME #15679
-
-use regex::{Regex, NoExpand};
-
-#[test]
-fn splitn() {
- let re = regex!(r"\d+");
- let text = "cauchy123plato456tyler789binx";
- let subs: Vec<&str> = re.splitn(text, 2).collect();
- assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
-}
-
-#[test]
-fn split() {
- let re = regex!(r"\d+");
- let text = "cauchy123plato456tyler789binx";
- let subs: Vec<&str> = re.split(text).collect();
- assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
-}
-
-#[test]
-fn empty_regex_empty_match() {
- let re = regex!("");
- let ms = re.find_iter("").collect::<Vec<(uint, uint)>>();
- assert_eq!(ms, vec![(0, 0)]);
-}
-
-#[test]
-fn empty_regex_nonempty_match() {
- let re = regex!("");
- let ms = re.find_iter("abc").collect::<Vec<(uint, uint)>>();
- assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
-}
-
-#[test]
-fn quoted_bracket_set() {
- let re = regex!(r"([\x{5b}\x{5d}])");
- let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
- assert_eq!(ms, vec![(0, 1), (1, 2)]);
- let re = regex!(r"([\[\]])");
- let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
- assert_eq!(ms, vec![(0, 1), (1, 2)]);
-}
-
-#[test]
-fn first_range_starts_with_left_bracket() {
- let re = regex!(r"([[-z])");
- let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
- assert_eq!(ms, vec![(0, 1), (1, 2)]);
-}
-
-#[test]
-fn range_ends_with_escape() {
- let re = regex!(r"([\[-\x{5d}])");
- let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
- assert_eq!(ms, vec![(0, 1), (1, 2)]);
-}
-
-macro_rules! replace {
- ($name:ident, $which:ident, $re:expr,
- $search:expr, $replace:expr, $result:expr) => (
- #[test]
- fn $name() {
- let re = regex!($re);
- assert_eq!(re.$which($search, $replace), String::from_str($result));
- }
- );
-}
-
-replace!{rep_first, replace, r"\d", "age: 26", "Z", "age: Z6"}
-replace!{rep_plus, replace, r"\d+", "age: 26", "Z", "age: Z"}
-replace!{rep_all, replace_all, r"\d", "age: 26", "Z", "age: ZZ"}
-replace!{rep_groups, replace, r"(\S+)\s+(\S+)", "w1 w2", "$2 $1", "w2 w1"}
-replace!{rep_double_dollar, replace,
- r"(\S+)\s+(\S+)", "w1 w2", "$2 $$1", "w2 $1"}
-replace!{rep_no_expand, replace,
- r"(\S+)\s+(\S+)", "w1 w2", NoExpand("$2 $1"), "$2 $1"}
-replace!{rep_named, replace_all,
- r"(?P<first>\S+)\s+(?P<last>\S+)(?P<space>\s*)",
- "w1 w2 w3 w4", "$last $first$space", "w2 w1 w4 w3"}
-replace!{rep_trim, replace_all, "^[ \t]+|[ \t]+$", " \t trim me\t \t",
- "", "trim me"}
-
-macro_rules! noparse {
- ($name:ident, $re:expr) => (
- #[test]
- fn $name() {
- let re = $re;
- match Regex::new(re) {
- Err(_) => {},
- Ok(_) => panic!("Regex '{}' should cause a parse error.", re),
- }
- }
- );
-}
-
-noparse!{fail_double_repeat, "a**"}
-noparse!{fail_no_repeat_arg, "*"}
-noparse!{fail_no_repeat_arg_begin, "^*"}
-noparse!{fail_incomplete_escape, "\\"}
-noparse!{fail_class_incomplete, "[A-"}
-noparse!{fail_class_not_closed, "[A"}
-noparse!{fail_class_no_begin, r"[\A]"}
-noparse!{fail_class_no_end, r"[\z]"}
-noparse!{fail_class_no_boundary, r"[\b]"}
-noparse!{fail_open_paren, "("}
-noparse!{fail_close_paren, ")"}
-noparse!{fail_invalid_range, "[a-Z]"}
-noparse!{fail_empty_capture_name, "(?P<>a)"}
-noparse!{fail_empty_capture_exp, "(?P<name>)"}
-noparse!{fail_bad_capture_name, "(?P<na-me>)"}
-noparse!{fail_bad_flag, "(?a)a"}
-noparse!{fail_empty_alt_before, "|a"}
-noparse!{fail_empty_alt_after, "a|"}
-noparse!{fail_counted_big_exact, "a{1001}"}
-noparse!{fail_counted_big_min, "a{1001,}"}
-noparse!{fail_counted_no_close, "a{1001"}
-noparse!{fail_unfinished_cap, "(?"}
-noparse!{fail_unfinished_escape, "\\"}
-noparse!{fail_octal_digit, r"\8"}
-noparse!{fail_hex_digit, r"\xG0"}
-noparse!{fail_hex_short, r"\xF"}
-noparse!{fail_hex_long_digits, r"\x{fffg}"}
-noparse!{fail_flag_bad, "(?a)"}
-noparse!{fail_flag_empty, "(?)"}
-noparse!{fail_double_neg, "(?-i-i)"}
-noparse!{fail_neg_empty, "(?i-)"}
-noparse!{fail_empty_group, "()"}
-noparse!{fail_dupe_named, "(?P<a>.)(?P<a>.)"}
-noparse!{fail_range_end_no_class, "[a-[:lower:]]"}
-noparse!{fail_range_end_no_begin, r"[a-\A]"}
-noparse!{fail_range_end_no_end, r"[a-\z]"}
-noparse!{fail_range_end_no_boundary, r"[a-\b]"}
-noparse!{fail_repeat_no_expr, r"-|+"}
-
-macro_rules! mat {
- ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
- #[test]
- fn $name() {
- let text = $text;
- let expected: Vec<Option<(uint, uint)>> = vec!($($loc)+);
- let r = regex!($re);
- let got = match r.captures(text) {
- Some(c) => c.iter_pos().collect::<Vec<Option<(uint, uint)>>>(),
- None => vec!(None),
- };
- // The test set sometimes leave out capture groups, so truncate
- // actual capture groups to match test set.
- let mut sgot = got.as_slice();
- if sgot.len() > expected.len() {
- sgot = &sgot[..expected.len()]
- }
- if expected != sgot {
- panic!("For RE '{}' against '{}', expected '{:?}' but got '{:?}'",
- $re, text, expected, sgot);
- }
- }
- );
-}
-
-// Some crazy expressions from regular-expressions.info.
-mat!{match_ranges,
- r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
- "num: 255", Some((5, 8))}
-mat!{match_ranges_not,
- r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
- "num: 256", None}
-mat!{match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3))}
-mat!{match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3))}
-mat!{match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4))}
-mat!{match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None}
-mat!{match_email, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
- "mine is jam.slam@gmail.com ", Some((8, 26))}
-mat!{match_email_not, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
- "mine is jam.slam@gmail ", None}
-mat!{match_email_big, r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
- "mine is jam.slam@gmail.com ", Some((8, 26))}
-mat!{match_date1,
- r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
- "1900-01-01", Some((0, 10))}
-mat!{match_date2,
- r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
- "1900-00-01", None}
-mat!{match_date3,
- r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
- "1900-13-01", None}
-
-// Exercise the flags.
-mat!{match_flag_case, "(?i)abc", "ABC", Some((0, 3))}
-mat!{match_flag_weird_case, "(?i)a(?-i)bc", "Abc", Some((0, 3))}
-mat!{match_flag_weird_case_not, "(?i)a(?-i)bc", "ABC", None}
-mat!{match_flag_case_dotnl, "(?is)a.", "A\n", Some((0, 2))}
-mat!{match_flag_case_dotnl_toggle, "(?is)a.(?-is)a.", "A\nab", Some((0, 4))}
-mat!{match_flag_case_dotnl_toggle_not, "(?is)a.(?-is)a.", "A\na\n", None}
-mat!{match_flag_case_dotnl_toggle_ok, "(?is)a.(?-is:a.)?", "A\na\n", Some((0, 2))}
-mat!{match_flag_multi, "(?m)(?:^\\d+$\n?)+", "123\n456\n789", Some((0, 11))}
-mat!{match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1))}
-mat!{match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2))}
-mat!{match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2))}
-
-// Some Unicode tests.
-// A couple of these are commented out because something in the guts of macro expansion is creating
-// invalid byte strings.
-//mat!{uni_literal, r"Ⅰ", "Ⅰ", Some((0, 3))}
-mat!{uni_one, r"\pN", "Ⅰ", Some((0, 3))}
-mat!{uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8))}
-mat!{uni_not, r"\PN+", "abⅠ", Some((0, 2))}
-mat!{uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2))}
-mat!{uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5))}
-mat!{uni_case, r"(?i)Δ", "δ", Some((0, 2))}
-//mat!{uni_case_not, r"Δ", "δ", None}
-mat!{uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8))}
-mat!{uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10))}
-mat!{uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10))}
-mat!{uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10))}
-
-// Test the Unicode friendliness of Perl character classes.
-mat!{uni_perl_w, r"\w+", "dδd", Some((0, 4))}
-mat!{uni_perl_w_not, r"\w+", "⥡", None}
-mat!{uni_perl_w_neg, r"\W+", "⥡", Some((0, 3))}
-mat!{uni_perl_d, r"\d+", "1२३9", Some((0, 8))}
-mat!{uni_perl_d_not, r"\d+", "Ⅱ", None}
-mat!{uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3))}
-mat!{uni_perl_s, r"\s+", " ", Some((0, 3))}
-mat!{uni_perl_s_not, r"\s+", "☃", None}
-mat!{uni_perl_s_neg, r"\S+", "☃", Some((0, 3))}
-
-// And do the same for word boundaries.
-mat!{uni_boundary_none, r"\d\b", "6δ", None}
-mat!{uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1))}
-
-// A whole mess of tests from Glenn Fowler's regex test suite.
-// Generated by the 'src/etc/regex-match-tests' program.
-mod matches;
+++ /dev/null
-The following license covers testregex.c and all associated test data.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
-without restriction, including without limitation the rights to use,
-copy, modify, merge, publish, distribute, and/or sell copies of the
-Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following disclaimer:
-
-THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+++ /dev/null
-Test data was taken from the Go distribution, which was in turn taken from the
-testregex test suite:
-
- http://www2.research.att.com/~astopen/testregex/testregex.html
-
-The LICENSE in this directory corresponds to the LICENSE that the data was
-released under.
-
-The tests themselves were modified for RE2/Go. A couple were modified further
-by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them.
-(Yes, it seems like RE2/Go includes failing test cases.) This may or may not
-have been a bad idea, but I think being consistent with an established Regex
-library is worth something.
-
-Note that these files are read by 'src/etc/regexp-match-tests' and turned into
-Rust tests found in 'src/libregexp/tests/matches.rs'.
-
+++ /dev/null
-NOTE all standard compliant implementations should pass these : 2002-05-31
-
-BE abracadabra$ abracadabracadabra (7,18)
-BE a...b abababbb (2,7)
-BE XXXXXX ..XXXXXX (2,8)
-E \) () (1,2)
-BE a] a]a (0,2)
-B } } (0,1)
-E \} } (0,1)
-BE \] ] (0,1)
-B ] ] (0,1)
-E ] ] (0,1)
-B { { (0,1)
-B } } (0,1)
-BE ^a ax (0,1)
-BE \^a a^a (1,3)
-BE a\^ a^ (0,2)
-BE a$ aa (1,2)
-BE a\$ a$ (0,2)
-BE ^$ NULL (0,0)
-E $^ NULL (0,0)
-E a($) aa (1,2)(2,2)
-E a*(^a) aa (0,1)(0,1)
-E (..)*(...)* a (0,0)
-E (..)*(...)* abcd (0,4)(2,4)
-E (ab|a)(bc|c) abc (0,3)(0,2)(2,3)
-E (ab)c|abc abc (0,3)(0,2)
-E a{0}b ab (1,2)
-E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
-E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
-E a{9876543210} NULL BADBR
-E ((a|a)|a) a (0,1)(0,1)(0,1)
-E (a*)(a|aa) aaaa (0,4)(0,3)(3,4)
-E a*(a.|aa) aaaa (0,4)(2,4)
-E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2)
-E (a|b)?.* b (0,1)(0,1)
-E (a|b)c|a(b|c) ac (0,2)(0,1)
-E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2)
-E (a|b)*c|(a|ab)*c abc (0,3)(1,2)
-E (a|b)*c|(a|ab)*c xc (1,2)
-E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)
-E a?(ab|ba)ab abab (0,4)(0,2)
-E a?(ac{0}b|ba)ab abab (0,4)(0,2)
-E ab|abab abbabab (0,2)
-E aba|bab|bba baaabbbaba (5,8)
-E aba|bab baaabbbaba (6,9)
-E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)
-E (a.|.a.)*|(a|.a...) aa (0,2)(0,2)
-E ab|a xabc (1,3)
-E ab|a xxabc (2,4)
-Ei (Ab|cD)* aBcD (0,4)(2,4)
-BE [^-] --a (2,3)
-BE [a-]* --a (0,3)
-BE [a-m-]* --amoma-- (0,4)
-E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17)
-E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17)
-{E [[:upper:]] A (0,1) [[<element>]] not supported
-E [[:lower:]]+ `az{ (1,3)
-E [[:upper:]]+ @AZ[ (1,3)
-# No collation in Go
-#BE [[-]] [[-]] (2,4)
-#BE [[.NIL.]] NULL ECOLLATE
-#BE [[=aleph=]] NULL ECOLLATE
-}
-BE$ \n \n (0,1)
-BEn$ \n \n (0,1)
-BE$ [^a] \n (0,1)
-BE$ \na \na (0,2)
-E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3)
-BE xxx xxx (0,3)
-E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6)
-E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3)
-E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11)
-E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)
-E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)
-E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81)
-E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25)
-E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22)
-E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11)
-BE$ .* \x01\x7f (0,2)
-E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57)
-L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH
-E a*a*a*a*a*b aaaaaaaaab (0,10)
-BE ^ NULL (0,0)
-BE $ NULL (0,0)
-BE ^$ NULL (0,0)
-BE ^a$ a (0,1)
-BE abc abc (0,3)
-BE abc xabcy (1,4)
-BE abc ababc (2,5)
-BE ab*c abc (0,3)
-BE ab*bc abc (0,3)
-BE ab*bc abbc (0,4)
-BE ab*bc abbbbc (0,6)
-E ab+bc abbc (0,4)
-E ab+bc abbbbc (0,6)
-E ab?bc abbc (0,4)
-E ab?bc abc (0,3)
-E ab?c abc (0,3)
-BE ^abc$ abc (0,3)
-BE ^abc abcc (0,3)
-BE abc$ aabc (1,4)
-BE ^ abc (0,0)
-BE $ abc (3,3)
-BE a.c abc (0,3)
-BE a.c axc (0,3)
-BE a.*c axyzc (0,5)
-BE a[bc]d abd (0,3)
-BE a[b-d]e ace (0,3)
-BE a[b-d] aac (1,3)
-BE a[-b] a- (0,2)
-BE a[b-] a- (0,2)
-BE a] a] (0,2)
-BE a[]]b a]b (0,3)
-BE a[^bc]d aed (0,3)
-BE a[^-b]c adc (0,3)
-BE a[^]b]c adc (0,3)
-E ab|cd abc (0,2)
-E ab|cd abcd (0,2)
-E a\(b a(b (0,3)
-E a\(*b ab (0,2)
-E a\(*b a((b (0,4)
-E ((a)) abc (0,1)(0,1)(0,1)
-E (a)b(c) abc (0,3)(0,1)(2,3)
-E a+b+c aabbabc (4,7)
-E a* aaa (0,3)
-#E (a*)* - (0,0)(0,0)
-E (a*)* - (0,0)(?,?) RE2/Go
-E (a*)+ - (0,0)(0,0)
-#E (a*|b)* - (0,0)(0,0)
-E (a*|b)* - (0,0)(?,?) RE2/Go
-E (a+|b)* ab (0,2)(1,2)
-E (a+|b)+ ab (0,2)(1,2)
-E (a+|b)? ab (0,1)(0,1)
-BE [^ab]* cde (0,3)
-#E (^)* - (0,0)(0,0)
-E (^)* - (0,0)(?,?) RE2/Go
-BE a* NULL (0,0)
-E ([abc])*d abbbcd (0,6)(4,5)
-E ([abc])*bcd abcd (0,4)(0,1)
-E a|b|c|d|e e (0,1)
-E (a|b|c|d|e)f ef (0,2)(0,1)
-#E ((a*|b))* - (0,0)(0,0)(0,0)
-E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go
-BE abcd*efg abcdefg (0,7)
-BE ab* xabyabbbz (1,3)
-BE ab* xayabbbz (1,2)
-E (ab|cd)e abcde (2,5)(2,4)
-BE [abhgefdc]ij hij (0,3)
-E (a|b)c*d abcd (1,4)(1,2)
-E (ab|ab*)bc abc (0,3)(0,1)
-E a([bc]*)c* abc (0,3)(1,3)
-E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4)
-E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4)
-E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4)
-E a[bcd]*dcdcde adcdcde (0,7)
-E (ab|a)b*c abc (0,3)(0,2)
-E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
-BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
-E ^a(bc+|b[eh])g|.h$ abh (1,3)
-E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
-E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
-E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)
-E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
-BE multiple words multiple words yeah (0,14)
-E (.*)c(.*) abcde (0,5)(0,2)(3,5)
-BE abcd abcd (0,4)
-E a(bc)d abcd (0,4)(1,3)
-E a[\ 1-\ 3]?c a\ 2c (0,3)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12)
-E a+(b|c)*d+ aabcdd (0,6)(3,4)
-E ^.+$ vivi (0,4)
-E ^(.+)$ vivi (0,4)(0,4)
-E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19)
-E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3)
-E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7)
-E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7)
-E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11)
-E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3)
-E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7)
-E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)
-E ((foo)|bar)!bas bar!bas (0,7)(0,3)
-E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)
-E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3)
-E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3)
-E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7)
-E (foo|(bar))!bas foo!bas (0,7)(0,3)
-E (foo|bar)!bas bar!bas (0,7)(0,3)
-E (foo|bar)!bas foo!bar!bas (4,11)(4,7)
-E (foo|bar)!bas foo!bas (0,7)(0,3)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
-E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)
-E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)
-E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11)
-E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)
-E .*(/XXX).* /XXX (0,4)(0,4)
-E .*(\\XXX).* \XXX (0,4)(0,4)
-E \\XXX \XXX (0,4)
-E .*(/000).* /000 (0,4)(0,4)
-E .*(\\000).* \000 (0,4)(0,4)
-E \\000 \000 (0,4)
+++ /dev/null
-NOTE null subexpression matches : 2002-06-06
-
-E (a*)* a (0,1)(0,1)
-#E SAME x (0,0)(0,0)
-E SAME x (0,0)(?,?) RE2/Go
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E (a*)+ a (0,1)(0,1)
-E SAME x (0,0)(0,0)
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E (a+)* a (0,1)(0,1)
-E SAME x (0,0)
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E (a+)+ a (0,1)(0,1)
-E SAME x NOMATCH
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-
-E ([a]*)* a (0,1)(0,1)
-#E SAME x (0,0)(0,0)
-E SAME x (0,0)(?,?) RE2/Go
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E ([a]*)+ a (0,1)(0,1)
-E SAME x (0,0)(0,0)
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E ([^b]*)* a (0,1)(0,1)
-#E SAME b (0,0)(0,0)
-E SAME b (0,0)(?,?) RE2/Go
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaab (0,6)(0,6)
-E ([ab]*)* a (0,1)(0,1)
-E SAME aaaaaa (0,6)(0,6)
-E SAME ababab (0,6)(0,6)
-E SAME bababa (0,6)(0,6)
-E SAME b (0,1)(0,1)
-E SAME bbbbbb (0,6)(0,6)
-E SAME aaaabcde (0,5)(0,5)
-E ([^a]*)* b (0,1)(0,1)
-E SAME bbbbbb (0,6)(0,6)
-#E SAME aaaaaa (0,0)(0,0)
-E SAME aaaaaa (0,0)(?,?) RE2/Go
-E ([^ab]*)* ccccxx (0,6)(0,6)
-#E SAME ababab (0,0)(0,0)
-E SAME ababab (0,0)(?,?) RE2/Go
-
-E ((z)+|a)* zabcde (0,2)(1,2)
-
-#{E a+? aaaaaa (0,1) no *? +? mimimal match ops
-#E (a) aaa (0,1)(0,1)
-#E (a*?) aaa (0,0)(0,0)
-#E (a)*? aaa (0,0)
-#E (a*?)*? aaa (0,0)
-#}
-
-B \(a*\)*\(x\) x (0,1)(0,0)(0,1)
-B \(a*\)*\(x\) ax (0,2)(0,1)(1,2)
-B \(a*\)*\(x\) axa (0,2)(0,1)(1,2)
-B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1)
-B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2)
-B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
-B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
-B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)
-
-#E (a*)*(x) x (0,1)(0,0)(0,1)
-E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go
-E (a*)*(x) ax (0,2)(0,1)(1,2)
-E (a*)*(x) axa (0,2)(0,1)(1,2)
-
-E (a*)+(x) x (0,1)(0,0)(0,1)
-E (a*)+(x) ax (0,2)(0,1)(1,2)
-E (a*)+(x) axa (0,2)(0,1)(1,2)
-
-E (a*){2}(x) x (0,1)(0,0)(0,1)
-E (a*){2}(x) ax (0,2)(1,1)(1,2)
-E (a*){2}(x) axa (0,2)(1,1)(1,2)
+++ /dev/null
-NOTE implicit vs. explicit repetitions : 2009-02-02
-
-# Glenn Fowler <gsf@research.att.com>
-# conforming matches (column 4) must match one of the following BREs
-# NOMATCH
-# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
-# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
-# i.e., each 3-tuple has two identical elements and one (?,?)
-
-E ((..)|(.)) NULL NOMATCH
-E ((..)|(.))((..)|(.)) NULL NOMATCH
-E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH
-
-E ((..)|(.)){1} NULL NOMATCH
-E ((..)|(.)){2} NULL NOMATCH
-E ((..)|(.)){3} NULL NOMATCH
-
-E ((..)|(.))* NULL (0,0)
-
-E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1)
-E ((..)|(.))((..)|(.)) a NOMATCH
-E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH
-
-E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1)
-E ((..)|(.)){2} a NOMATCH
-E ((..)|(.)){3} a NOMATCH
-
-E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1)
-
-E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
-E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH
-
-E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2)
-E ((..)|(.)){3} aa NOMATCH
-
-E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?)
-
-E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
-E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
-
-E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?)
-#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3)
-E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
-E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3)
-
-#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3)
-E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
-
-E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
-E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
-
-E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?)
-#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4)
-E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go
-
-E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?)
-
-E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
-E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
-
-E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?)
-#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5)
-E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
-
-#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5)
-E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
-
-E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
-E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
-
-E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?)
-E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?)
-
-E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?)
-
-NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
-
-# These test a bug in OS X / FreeBSD / NetBSD, and libtree.
-# Linux/GLIBC gets the {8,} and {8,8} wrong.
-
-:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8)
-:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8)
-:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8)
-:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8)
-:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8)
-:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8)
-:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8)
-:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8)
-:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8)
-#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8)
-:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8)
-:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8)
-:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8)
-:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8)
-:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8)
-:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8)
-:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8)
-:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go
-:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8)
-
-# These test a fixed bug in my regex-tdfa that did not keep the expanded
-# form properly grouped, so right association did the wrong thing with
-# these ambiguous patterns (crafted just to test my code when I became
-# suspicious of my implementation). The first subexpression should use
-# "ab" then "a" then "bcd".
-
-# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
-# results like (0,6)(4,5)(6,6).
-
-:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1)
-:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1)
-:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH
-:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1)
-:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1)
-:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH
-:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1)
-:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1)
-
-# The above worked on Linux/GLIBC but the following often fail.
-# They also trip up OS X / FreeBSD / NetBSD:
-
-#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH
-#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH
-#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
-:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
-:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// FIXME: Currently, the VM simulates an NFA. It would be nice to have another
-// VM that simulates a DFA.
-//
-// According to Russ Cox[1], a DFA performs better than an NFA, principally
-// because it reuses states previously computed by the machine *and* doesn't
-// keep track of capture groups. The drawback of a DFA (aside from its
-// complexity) is that it can't accurately return the locations of submatches.
-// The NFA *can* do that. (This is my understanding anyway.)
-//
-// Cox suggests that a DFA ought to be used to answer "does this match" and
-// "where does it match" questions. (In the latter, the starting position of
-// the match is computed by executing the regex backwards.) Cox also suggests
-// that a DFA should be run when asking "where are the submatches", which can
-// 1) quickly answer "no" is there's no match and 2) discover the substring
-// that matches, which means running the NFA on smaller input.
-//
-// Currently, the NFA simulation implemented below does some dirty tricks to
-// avoid tracking capture groups when they aren't needed (which only works
-// for 'is_match', not 'find'). This is a half-measure, but does provide some
-// perf improvement.
-//
-// AFAIK, the DFA/NFA approach is implemented in RE2/C++ but *not* in RE2/Go.
-//
-// [1] - http://swtch.com/~rsc/regex/regex3.html
-
-pub use self::MatchKind::*;
-pub use self::StepState::*;
-
-use std::cmp;
-use std::cmp::Ordering::{self, Less, Equal, Greater};
-use std::mem;
-use std::iter::repeat;
-use std::slice::SliceExt;
-use compile::{
- Program,
- Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd, EmptyWordBoundary,
- Save, Jump, Split,
-};
-use parse::{FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL, FLAG_NEGATED};
-use unicode::regex::PERLW;
-
-pub type CaptureLocs = Vec<Option<uint>>;
-
-/// Indicates the type of match to be performed by the VM.
-#[derive(Copy)]
-pub enum MatchKind {
- /// Only checks if a match exists or not. Does not return location.
- Exists,
- /// Returns the start and end indices of the entire match in the input
- /// given.
- Location,
- /// Returns the start and end indices of each submatch in the input given.
- Submatches,
-}
-
-/// Runs an NFA simulation on the compiled expression given on the search text
-/// `input`. The search begins at byte index `start` and ends at byte index
-/// `end`. (The range is specified here so that zero-width assertions will work
-/// correctly when searching for successive non-overlapping matches.)
-///
-/// The `which` parameter indicates what kind of capture information the caller
-/// wants. There are three choices: match existence only, the location of the
-/// entire match or the locations of the entire match in addition to the
-/// locations of each submatch.
-pub fn run<'r, 't>(which: MatchKind, prog: &'r Program, input: &'t str,
- start: uint, end: uint) -> CaptureLocs {
- Nfa {
- which: which,
- prog: prog,
- input: input,
- start: start,
- end: end,
- ic: 0,
- chars: CharReader::new(input),
- }.run()
-}
-
-struct Nfa<'r, 't> {
- which: MatchKind,
- prog: &'r Program,
- input: &'t str,
- start: uint,
- end: uint,
- ic: uint,
- chars: CharReader<'t>,
-}
-
-/// Indicates the next action to take after a single non-empty instruction
-/// is processed.
-#[derive(Copy)]
-pub enum StepState {
- /// This is returned if and only if a Match instruction is reached and
- /// we only care about the existence of a match. It instructs the VM to
- /// quit early.
- StepMatchEarlyReturn,
- /// Indicates that a match was found. Thus, the rest of the states in the
- /// *current* queue should be dropped (i.e., leftmost-first semantics).
- /// States in the "next" queue can still be processed.
- StepMatch,
- /// No match was found. Continue with the next state in the queue.
- StepContinue,
-}
-
-impl<'r, 't> Nfa<'r, 't> {
- fn run(&mut self) -> CaptureLocs {
- let ncaps = match self.which {
- Exists => 0,
- Location => 1,
- Submatches => self.prog.num_captures(),
- };
- let mut matched = false;
- let ninsts = self.prog.insts.len();
- let mut clist = &mut Threads::new(self.which, ninsts, ncaps);
- let mut nlist = &mut Threads::new(self.which, ninsts, ncaps);
-
- let mut groups: Vec<_> = repeat(None).take(ncaps * 2).collect();
-
- // Determine if the expression starts with a '^' so we can avoid
- // simulating .*?
- // Make sure multi-line mode isn't enabled for it, otherwise we can't
- // drop the initial .*?
- let prefix_anchor =
- match self.prog.insts[1] {
- EmptyBegin(flags) if flags & FLAG_MULTI == 0 => true,
- _ => false,
- };
-
- self.ic = self.start;
- let mut next_ic = self.chars.set(self.start);
- while self.ic <= self.end {
- if clist.size == 0 {
- // We have a match and we're done exploring alternatives.
- // Time to quit.
- if matched {
- break
- }
-
- // If there are no threads to try, then we'll have to start
- // over at the beginning of the regex.
- // BUT, if there's a literal prefix for the program, try to
- // jump ahead quickly. If it can't be found, then we can bail
- // out early.
- if self.prog.prefix.len() > 0 && clist.size == 0 {
- let needle = self.prog.prefix.as_bytes();
- let haystack = &self.input.as_bytes()[self.ic..];
- match find_prefix(needle, haystack) {
- None => break,
- Some(i) => {
- self.ic += i;
- next_ic = self.chars.set(self.ic);
- }
- }
- }
- }
-
- // This simulates a preceding '.*?' for every regex by adding
- // a state starting at the current position in the input for the
- // beginning of the program only if we don't already have a match.
- if clist.size == 0 || (!prefix_anchor && !matched) {
- self.add(clist, 0, groups.as_mut_slice())
- }
-
- // Now we try to read the next character.
- // As a result, the 'step' method will look at the previous
- // character.
- self.ic = next_ic;
- next_ic = self.chars.advance();
-
- for i in range(0, clist.size) {
- let pc = clist.pc(i);
- let step_state = self.step(groups.as_mut_slice(), nlist,
- clist.groups(i), pc);
- match step_state {
- StepMatchEarlyReturn => return vec![Some(0), Some(0)],
- StepMatch => { matched = true; break },
- StepContinue => {},
- }
- }
- mem::swap(&mut clist, &mut nlist);
- nlist.empty();
- }
- match self.which {
- Exists if matched => vec![Some(0), Some(0)],
- Exists => vec![None, None],
- Location | Submatches => groups,
- }
- }
-
- fn step(&self, groups: &mut [Option<uint>], nlist: &mut Threads,
- caps: &mut [Option<uint>], pc: uint)
- -> StepState {
- match self.prog.insts[pc] {
- Match => {
- match self.which {
- Exists => {
- return StepMatchEarlyReturn
- }
- Location => {
- groups[0] = caps[0];
- groups[1] = caps[1];
- return StepMatch
- }
- Submatches => {
- for (slot, val) in groups.iter_mut().zip(caps.iter()) {
- *slot = *val;
- }
- return StepMatch
- }
- }
- }
- OneChar(c, flags) => {
- if self.char_eq(flags & FLAG_NOCASE > 0, self.chars.prev, c) {
- self.add(nlist, pc+1, caps);
- }
- }
- CharClass(ref ranges, flags) => {
- if self.chars.prev.is_some() {
- let c = self.chars.prev.unwrap();
- let negate = flags & FLAG_NEGATED > 0;
- let casei = flags & FLAG_NOCASE > 0;
- let found = ranges.as_slice();
- let found = found.binary_search_by(|&rc| class_cmp(casei, c, rc)).is_ok();
- if found ^ negate {
- self.add(nlist, pc+1, caps);
- }
- }
- }
- Any(flags) => {
- if flags & FLAG_DOTNL > 0
- || !self.char_eq(false, self.chars.prev, '\n') {
- self.add(nlist, pc+1, caps)
- }
- }
- EmptyBegin(_) | EmptyEnd(_) | EmptyWordBoundary(_)
- | Save(_) | Jump(_) | Split(_, _) => {},
- }
- StepContinue
- }
-
- fn add(&self, nlist: &mut Threads, pc: uint, groups: &mut [Option<uint>]) {
- if nlist.contains(pc) {
- return
- }
- // We have to add states to the threads list even if their empty.
- // TL;DR - It prevents cycles.
- // If we didn't care about cycles, we'd *only* add threads that
- // correspond to non-jumping instructions (OneChar, Any, Match, etc.).
- // But, it's possible for valid regexs (like '(a*)*') to result in
- // a cycle in the instruction list. e.g., We'll keep chasing the Split
- // instructions forever.
- // So we add these instructions to our thread queue, but in the main
- // VM loop, we look for them but simply ignore them.
- // Adding them to the queue prevents them from being revisited so we
- // can avoid cycles (and the inevitable stack overflow).
- //
- // We make a minor optimization by indicating that the state is "empty"
- // so that its capture groups are not filled in.
- match self.prog.insts[pc] {
- EmptyBegin(flags) => {
- let multi = flags & FLAG_MULTI > 0;
- nlist.add(pc, groups, true);
- if self.chars.is_begin()
- || (multi && self.char_is(self.chars.prev, '\n')) {
- self.add(nlist, pc + 1, groups)
- }
- }
- EmptyEnd(flags) => {
- let multi = flags & FLAG_MULTI > 0;
- nlist.add(pc, groups, true);
- if self.chars.is_end()
- || (multi && self.char_is(self.chars.cur, '\n')) {
- self.add(nlist, pc + 1, groups)
- }
- }
- EmptyWordBoundary(flags) => {
- nlist.add(pc, groups, true);
- if self.chars.is_word_boundary() == !(flags & FLAG_NEGATED > 0) {
- self.add(nlist, pc + 1, groups)
- }
- }
- Save(slot) => {
- nlist.add(pc, groups, true);
- match self.which {
- Location if slot <= 1 => {
- let old = groups[slot];
- groups[slot] = Some(self.ic);
- self.add(nlist, pc + 1, groups);
- groups[slot] = old;
- }
- Submatches => {
- let old = groups[slot];
- groups[slot] = Some(self.ic);
- self.add(nlist, pc + 1, groups);
- groups[slot] = old;
- }
- Exists | Location => self.add(nlist, pc + 1, groups),
- }
- }
- Jump(to) => {
- nlist.add(pc, groups, true);
- self.add(nlist, to, groups)
- }
- Split(x, y) => {
- nlist.add(pc, groups, true);
- self.add(nlist, x, groups);
- self.add(nlist, y, groups);
- }
- Match | OneChar(_, _) | CharClass(_, _) | Any(_) => {
- nlist.add(pc, groups, false);
- }
- }
- }
-
- // FIXME: For case insensitive comparisons, it uses the uppercase
- // character and tests for equality. IIUC, this does not generalize to
- // all of Unicode. I believe we need to check the entire fold for each
- // character. This will be easy to add if and when it gets added to Rust's
- // standard library.
- #[inline]
- fn char_eq(&self, casei: bool, textc: Option<char>, regc: char) -> bool {
- match textc {
- None => false,
- Some(textc) => {
- regc == textc
- || (casei && regc.to_uppercase() == textc.to_uppercase())
- }
- }
- }
-
- #[inline]
- fn char_is(&self, textc: Option<char>, regc: char) -> bool {
- textc == Some(regc)
- }
-}
-
-/// CharReader is responsible for maintaining a "previous" and a "current"
-/// character. This one-character lookahead is necessary for assertions that
-/// look one character before or after the current position.
-pub struct CharReader<'t> {
- /// The previous character read. It is None only when processing the first
- /// character of the input.
- pub prev: Option<char>,
- /// The current character.
- pub cur: Option<char>,
- input: &'t str,
- next: uint,
-}
-
-impl<'t> CharReader<'t> {
- /// Returns a new CharReader that advances through the input given.
- /// Note that a CharReader has no knowledge of the range in which to search
- /// the input.
- pub fn new(input: &'t str) -> CharReader<'t> {
- CharReader {
- prev: None,
- cur: None,
- input: input,
- next: 0,
- }
- }
-
- /// Sets the previous and current character given any arbitrary byte
- /// index (at a Unicode codepoint boundary).
- #[inline]
- pub fn set(&mut self, ic: uint) -> uint {
- self.prev = None;
- self.cur = None;
- self.next = 0;
-
- if self.input.len() == 0 {
- return 1
- }
- if ic > 0 {
- let i = cmp::min(ic, self.input.len());
- let prev = self.input.char_range_at_reverse(i);
- self.prev = Some(prev.ch);
- }
- if ic < self.input.len() {
- let cur = self.input.char_range_at(ic);
- self.cur = Some(cur.ch);
- self.next = cur.next;
- self.next
- } else {
- self.input.len() + 1
- }
- }
-
- /// Does the same as `set`, except it always advances to the next
- /// character in the input (and therefore does half as many UTF8 decodings).
- #[inline]
- pub fn advance(&mut self) -> uint {
- self.prev = self.cur;
- if self.next < self.input.len() {
- let cur = self.input.char_range_at(self.next);
- self.cur = Some(cur.ch);
- self.next = cur.next;
- } else {
- self.cur = None;
- self.next = self.input.len() + 1;
- }
- self.next
- }
-
- /// Returns true if and only if this is the beginning of the input
- /// (ignoring the range of the input to search).
- #[inline]
- pub fn is_begin(&self) -> bool { self.prev.is_none() }
-
- /// Returns true if and only if this is the end of the input
- /// (ignoring the range of the input to search).
- #[inline]
- pub fn is_end(&self) -> bool { self.cur.is_none() }
-
- /// Returns true if and only if the current position is a word boundary.
- /// (Ignoring the range of the input to search.)
- pub fn is_word_boundary(&self) -> bool {
- if self.is_begin() {
- return is_word(self.cur)
- }
- if self.is_end() {
- return is_word(self.prev)
- }
- (is_word(self.cur) && !is_word(self.prev))
- || (is_word(self.prev) && !is_word(self.cur))
- }
-}
-
-struct Thread {
- pc: uint,
- groups: Vec<Option<uint>>,
-}
-
-struct Threads {
- which: MatchKind,
- queue: Vec<Thread>,
- sparse: Vec<uint>,
- size: uint,
-}
-
-impl Threads {
- // This is using a wicked neat trick to provide constant time lookup
- // for threads in the queue using a sparse set. A queue of threads is
- // allocated once with maximal size when the VM initializes and is reused
- // throughout execution. That is, there should be zero allocation during
- // the execution of a VM.
- //
- // See http://research.swtch.com/sparse for the deets.
- fn new(which: MatchKind, num_insts: uint, ncaps: uint) -> Threads {
- Threads {
- which: which,
- queue: range(0, num_insts).map(|_| {
- Thread { pc: 0, groups: repeat(None).take(ncaps * 2).collect() }
- }).collect(),
- sparse: repeat(0u).take(num_insts).collect(),
- size: 0,
- }
- }
-
- fn add(&mut self, pc: uint, groups: &[Option<uint>], empty: bool) {
- let t = &mut self.queue[self.size];
- t.pc = pc;
- match (empty, self.which) {
- (_, Exists) | (true, _) => {},
- (false, Location) => {
- t.groups[0] = groups[0];
- t.groups[1] = groups[1];
- }
- (false, Submatches) => {
- for (slot, val) in t.groups.iter_mut().zip(groups.iter()) {
- *slot = *val;
- }
- }
- }
- self.sparse[pc] = self.size;
- self.size += 1;
- }
-
- #[inline]
- fn contains(&self, pc: uint) -> bool {
- let s = self.sparse[pc];
- s < self.size && self.queue[s].pc == pc
- }
-
- #[inline]
- fn empty(&mut self) {
- self.size = 0;
- }
-
- #[inline]
- fn pc(&self, i: uint) -> uint {
- self.queue[i].pc
- }
-
- #[inline]
- fn groups<'r>(&'r mut self, i: uint) -> &'r mut [Option<uint>] {
- let q = &mut self.queue[i];
- q.groups.as_mut_slice()
- }
-}
-
-/// Returns true if the character is a word character, according to the
-/// (Unicode friendly) Perl character class '\w'.
-/// Note that this is only use for testing word boundaries. The actual '\w'
-/// is encoded as a CharClass instruction.
-pub fn is_word(c: Option<char>) -> bool {
- let c = match c {
- None => return false,
- Some(c) => c,
- };
- // Try the common ASCII case before invoking binary search.
- match c {
- '_' | '0' ... '9' | 'a' ... 'z' | 'A' ... 'Z' => true,
- _ => PERLW.binary_search_by(|&(start, end)| {
- if c >= start && c <= end {
- Equal
- } else if start > c {
- Greater
- } else {
- Less
- }
- }).is_ok()
- }
-}
-
-/// Given a character and a single character class range, return an ordering
-/// indicating whether the character is less than the start of the range,
-/// in the range (inclusive) or greater than the end of the range.
-///
-/// If `casei` is `true`, then this ordering is computed case insensitively.
-///
-/// This function is meant to be used with a binary search.
-#[inline]
-fn class_cmp(casei: bool, mut textc: char,
- (mut start, mut end): (char, char)) -> Ordering {
- if casei {
- // FIXME: This is pretty ridiculous. All of this case conversion
- // can be moved outside this function:
- // 1) textc should be uppercased outside the bsearch.
- // 2) the character class itself should be uppercased either in the
- // parser or the compiler.
- // FIXME: This is too simplistic for correct Unicode support.
- // See also: char_eq
- textc = textc.to_uppercase();
- start = start.to_uppercase();
- end = end.to_uppercase();
- }
- if textc >= start && textc <= end {
- Equal
- } else if start > textc {
- Greater
- } else {
- Less
- }
-}
-
-/// Returns the starting location of `needle` in `haystack`.
-/// If `needle` is not in `haystack`, then `None` is returned.
-///
-/// Note that this is using a naive substring algorithm.
-#[inline]
-pub fn find_prefix(needle: &[u8], haystack: &[u8]) -> Option<uint> {
- let (hlen, nlen) = (haystack.len(), needle.len());
- if nlen > hlen || nlen == 0 {
- return None
- }
- for (offset, window) in haystack.windows(nlen).enumerate() {
- if window == needle {
- return Some(offset)
- }
- }
- None
-}
extern crate getopts;
extern crate graphviz;
extern crate libc;
-extern crate regex;
extern crate rustc_llvm;
extern crate rustc_back;
extern crate serialize;
use syntax::{abi, ast, ast_map};
use syntax::ast_util::is_shift_binop;
use syntax::attr::{self, AttrMetaMethods};
-use syntax::codemap::{Span, DUMMY_SP};
+use syntax::codemap::{self, Span, DUMMY_SP};
use syntax::parse::token;
use syntax::ast::{TyIs, TyUs, TyI8, TyU8, TyI16, TyU16, TyI32, TyU32, TyI64, TyU64};
use syntax::ast_util;
"comparison is useless due to type limits");
}
- if is_shift_binop(binop) {
+ if is_shift_binop(binop.node) {
let opt_ty_bits = match ty::expr_ty(cx.tcx, &**l).sty {
ty::ty_int(t) => Some(int_ty_bits(t, cx.sess().target.int_type)),
ty::ty_uint(t) => Some(uint_ty_bits(t, cx.sess().target.uint_type)),
fn is_valid<T:cmp::PartialOrd>(binop: ast::BinOp, v: T,
min: T, max: T) -> bool {
- match binop {
+ match binop.node {
ast::BiLt => v > min && v <= max,
ast::BiLe => v >= min && v < max,
ast::BiGt => v >= min && v < max,
}
fn rev_binop(binop: ast::BinOp) -> ast::BinOp {
- match binop {
+ codemap::respan(binop.span, match binop.node {
ast::BiLt => ast::BiGt,
ast::BiLe => ast::BiGe,
ast::BiGt => ast::BiLt,
ast::BiGe => ast::BiLe,
- _ => binop
- }
+ _ => return binop
+ })
}
// for int & uint, be conservative with the warnings, so that the
}
fn is_comparison(binop: ast::BinOp) -> bool {
- match binop {
+ match binop.node {
ast::BiEq | ast::BiLt | ast::BiLe |
ast::BiNe | ast::BiGe | ast::BiGt => true,
_ => false
expr_exit
}
- ast::ExprBinary(op, ref l, ref r) if ast_util::lazy_binop(op) => {
+ ast::ExprBinary(op, ref l, ref r) if ast_util::lazy_binop(op.node) => {
//
// [pred]
// |
match (eval_const_expr_partial(tcx, &**a),
eval_const_expr_partial(tcx, &**b)) {
(Ok(const_float(a)), Ok(const_float(b))) => {
- match op {
+ match op.node {
ast::BiAdd => Ok(const_float(a + b)),
ast::BiSub => Ok(const_float(a - b)),
ast::BiMul => Ok(const_float(a * b)),
}
}
(Ok(const_int(a)), Ok(const_int(b))) => {
- match op {
+ match op.node {
ast::BiAdd => Ok(const_int(a + b)),
ast::BiSub => Ok(const_int(a - b)),
ast::BiMul => Ok(const_int(a * b)),
}
}
(Ok(const_uint(a)), Ok(const_uint(b))) => {
- match op {
+ match op.node {
ast::BiAdd => Ok(const_uint(a + b)),
ast::BiSub => Ok(const_uint(a - b)),
ast::BiMul => Ok(const_uint(a * b)),
}
// shifts can have any integral type as their rhs
(Ok(const_int(a)), Ok(const_uint(b))) => {
- match op {
+ match op.node {
ast::BiShl => Ok(const_int(a << b as uint)),
ast::BiShr => Ok(const_int(a >> b as uint)),
_ => Err("can't do this op on an int and uint".to_string())
}
}
(Ok(const_uint(a)), Ok(const_int(b))) => {
- match op {
+ match op.node {
ast::BiShl => Ok(const_uint(a << b as uint)),
ast::BiShr => Ok(const_uint(a >> b as uint)),
_ => Err("can't do this op on a uint and int".to_string())
}
}
(Ok(const_bool(a)), Ok(const_bool(b))) => {
- Ok(const_bool(match op {
+ Ok(const_bool(match op.node {
ast::BiAnd => a && b,
ast::BiOr => a || b,
ast::BiBitXor => a ^ b,
}
ast::ExprBinary(op, ref lhs, ref rhs) => {
- let pass_args = if ast_util::is_by_value_binop(op) {
+ let pass_args = if ast_util::is_by_value_binop(op.node) {
PassArgs::ByValue
} else {
PassArgs::ByRef
ir.add_live_node_for_node(expr.id, ExprNode(expr.span));
visit::walk_expr(ir, expr);
}
- ast::ExprBinary(op, _, _) if ast_util::lazy_binop(op) => {
+ ast::ExprBinary(op, _, _) if ast_util::lazy_binop(op.node) => {
ir.add_live_node_for_node(expr.id, ExprNode(expr.span));
visit::walk_expr(ir, expr);
}
self.propagate_through_exprs(&exprs[], succ)
}
- ast::ExprBinary(op, ref l, ref r) if ast_util::lazy_binop(op) => {
+ ast::ExprBinary(op, ref l, ref r) if ast_util::lazy_binop(op.node) => {
let r_succ = self.propagate_through_expr(&**r, succ);
let ln = self.live_node(expr.id, expr.span);
use util::common::can_reach;
use std::cell::RefCell;
-use syntax::codemap::Span;
+use syntax::codemap::{self, Span};
use syntax::{ast, visit};
use syntax::ast::{Block, Item, FnDecl, NodeId, Arm, Pat, Stmt, Expr, Local};
use syntax::ast_util::{stmt_id};
// scopes, meaning that temporaries cannot outlive them.
// This ensures fixed size stacks.
- ast::ExprBinary(ast::BiAnd, _, ref r) |
- ast::ExprBinary(ast::BiOr, _, ref r) => {
+ ast::ExprBinary(codemap::Spanned { node: ast::BiAnd, .. }, _, ref r) |
+ ast::ExprBinary(codemap::Spanned { node: ast::BiOr, .. }, _, ref r) => {
// For shortcircuiting operators, mark the RHS as a terminating
// scope since it only executes conditionally.
terminating(r.id);
static opcat_mod: int = 8;
fn opcat(op: ast::BinOp) -> int {
- match op {
+ match op.node {
ast::BiAdd => opcat_add,
ast::BiSub => opcat_sub,
ast::BiMul => opcat_mult,
use session::search_paths::PathKind;
use util::nodemap::NodeMap;
-use regex::Regex;
-
use syntax::ast::NodeId;
use syntax::codemap::Span;
use syntax::diagnostic::{self, Emitter};
!msg.contains("structure constructor specifies a structure of type") {
return None
}
-
- let first = Regex::new(r"[( ]expected").unwrap();
- let second = Regex::new(r" found").unwrap();
- let third = Regex::new(
- r"\((values differ|lifetime|cyclic type of infinite size)").unwrap();
+ let first = msg.match_indices("expected").filter(|s| {
+ s.0 > 0 && (msg.char_at_reverse(s.0) == ' ' ||
+ msg.char_at_reverse(s.0) == '(')
+ }).map(|(a, b)| (a - 1, b));
+ let second = msg.match_indices("found").filter(|s| {
+ msg.char_at_reverse(s.0) == ' '
+ }).map(|(a, b)| (a - 1, b));
let mut new_msg = String::new();
let mut head = 0u;
// Insert `\n` before expected and found.
- for (pos1, pos2) in first.find_iter(msg).zip(
- second.find_iter(msg)) {
+ for (pos1, pos2) in first.zip(second) {
new_msg = new_msg +
- // A `(` may be preceded by a space and it should be trimmed
- msg[head..pos1.0].trim_right() + // prefix
- "\n" + // insert before first
- &msg[pos1.0..pos1.1] + // insert what first matched
- &msg[pos1.1..pos2.0] + // between matches
- "\n " + // insert before second
- // 123
- // `expected` is 3 char longer than `found`. To align the types, `found` gets
- // 3 spaces prepended.
- &msg[pos2.0..pos2.1]; // insert what second matched
+ // A `(` may be preceded by a space and it should be trimmed
+ msg[head..pos1.0].trim_right() + // prefix
+ "\n" + // insert before first
+ &msg[pos1.0..pos1.1] + // insert what first matched
+ &msg[pos1.1..pos2.0] + // between matches
+ "\n " + // insert before second
+ // 123
+ // `expected` is 3 char longer than `found`. To align the types,
+ // `found` gets 3 spaces prepended.
+ &msg[pos2.0..pos2.1]; // insert what second matched
head = pos2.1;
}
let mut tail = &msg[head..];
+ let third = tail.find_str("(values differ")
+ .or(tail.find_str("(lifetime"))
+ .or(tail.find_str("(cyclic type of infinite size"));
// Insert `\n` before any remaining messages which match.
- for pos in third.find_iter(tail).take(1) {
- // The end of the message may just be wrapped in `()` without `expected`/`found`.
- // Push this also to a new line and add the final tail after.
+ if let Some(pos) = third {
+ // The end of the message may just be wrapped in `()` without
+ // `expected`/`found`. Push this also to a new line and add the
+ // final tail after.
new_msg = new_msg +
- // `(` is usually preceded by a space and should be trimmed.
- tail[..pos.0].trim_right() + // prefix
- "\n" + // insert before paren
- &tail[pos.0..]; // append the tail
+ // `(` is usually preceded by a space and should be trimmed.
+ tail[..pos].trim_right() + // prefix
+ "\n" + // insert before paren
+ &tail[pos..]; // append the tail
tail = "";
}
new_msg.push_str(tail);
-
- return Some(new_msg)
+ return Some(new_msg);
}
pub fn build_session(sopts: config::Options,
SawExprCall,
SawExprMethodCall,
SawExprTup,
- SawExprBinary(ast::BinOp),
+ SawExprBinary(ast::BinOp_),
SawExprUnary(ast::UnOp),
SawExprLit(ast::Lit_),
SawExprCast,
SawExprClosure,
SawExprBlock,
SawExprAssign,
- SawExprAssignOp(ast::BinOp),
+ SawExprAssignOp(ast::BinOp_),
SawExprIndex,
SawExprRange,
SawExprPath,
ExprCall(..) => SawExprCall,
ExprMethodCall(..) => SawExprMethodCall,
ExprTup(..) => SawExprTup,
- ExprBinary(op, _, _) => SawExprBinary(op),
+ ExprBinary(op, _, _) => SawExprBinary(op.node),
ExprUnary(op, _) => SawExprUnary(op),
ExprLit(ref lit) => SawExprLit(lit.node.clone()),
ExprCast(..) => SawExprCast,
ExprClosure(..) => SawExprClosure,
ExprBlock(..) => SawExprBlock,
ExprAssign(..) => SawExprAssign,
- ExprAssignOp(op, _, _) => SawExprAssignOp(op),
+ ExprAssignOp(op, _, _) => SawExprAssignOp(op.node),
ExprField(_, id) => SawExprField(content(id.node)),
ExprTupField(_, id) => SawExprTupField(id.node),
ExprIndex(..) => SawExprIndex,
lhs: ValueRef,
rhs: ValueRef,
t: Ty<'tcx>,
- op: ast::BinOp)
+ op: ast::BinOp_)
-> Result<'blk, 'tcx> {
let f = |&: a| Result::new(cx, compare_scalar_values(cx, lhs, rhs, a, op));
lhs: ValueRef,
rhs: ValueRef,
nt: scalar_type,
- op: ast::BinOp)
+ op: ast::BinOp_)
-> ValueRef {
let _icx = push_ctxt("compare_scalar_values");
fn die(cx: Block) -> ! {
not supported for floating point SIMD types")
},
ty::ty_uint(_) | ty::ty_int(_) => {
- let cmp = match op {
+ let cmp = match op.node {
ast::BiEq => llvm::IntEQ,
ast::BiNe => llvm::IntNE,
ast::BiLt => llvm::IntSLT,
G: FnOnce(ValueRef, Type) -> ValueRef,
{
// Shifts may have any size int on the rhs
- if ast_util::is_shift_binop(op) {
+ if ast_util::is_shift_binop(op.node) {
let mut rhs_llty = val_ty(rhs);
let mut lhs_llty = val_ty(lhs);
if rhs_llty.kind() == Vector { rhs_llty = rhs_llty.element_type() }
rhs: ValueRef,
rhs_t: Ty<'tcx>)
-> Block<'blk, 'tcx> {
- let (zero_text, overflow_text) = if divrem == ast::BiDiv {
+ let (zero_text, overflow_text) = if divrem.node == ast::BiDiv {
("attempted to divide by zero",
"attempted to divide with overflow")
} else {
let ty = ty::expr_ty(cx.tcx(), &**e1);
let is_float = ty::type_is_fp(ty);
let signed = ty::type_is_signed(ty);
- return match b {
+ return match b.node {
ast::BiAdd => {
if is_float { llvm::LLVMConstFAdd(te1, te2) }
else { llvm::LLVMConstAdd(te1, te2) }
let rhs_datum = unpack_datum!(bcx, trans(bcx, &**rhs));
trans_overloaded_op(bcx, expr, MethodCall::expr(expr.id), lhs,
vec![(rhs_datum, rhs.id)], Some(dest),
- !ast_util::is_by_value_binop(op)).bcx
+ !ast_util::is_by_value_binop(op.node)).bcx
}
ast::ExprUnary(op, ref subexpr) => {
// if not overloaded, would be RvalueDatumExpr
let binop_debug_loc = binop_expr.debug_loc();
let mut bcx = bcx;
- let val = match op {
+ let val = match op.node {
ast::BiAdd => {
if is_float {
FAdd(bcx, lhs, rhs, binop_debug_loc)
}
ast::BiEq | ast::BiNe | ast::BiLt | ast::BiGe | ast::BiLe | ast::BiGt => {
if ty::type_is_scalar(rhs_t) {
- unpack_result!(bcx, base::compare_scalar_types(bcx, lhs, rhs, rhs_t, op))
+ unpack_result!(bcx, base::compare_scalar_types(bcx, lhs, rhs, rhs_t, op.node))
} else if is_simd {
base::compare_simd_types(bcx, lhs, rhs, intype, ty::simd_size(tcx, lhs_t), op)
} else {
// if overloaded, would be RvalueDpsExpr
assert!(!ccx.tcx().method_map.borrow().contains_key(&MethodCall::expr(expr.id)));
- match op {
+ match op.node {
ast::BiAnd => {
trans_lazy_binop(bcx, expr, lazy_and, lhs, rhs)
}
let lhs_t = structurally_resolved_type(fcx, lhs.span,
fcx.expr_ty(&*lhs));
- if ty::type_is_integral(lhs_t) && ast_util::is_shift_binop(op) {
+ if ty::type_is_integral(lhs_t) && ast_util::is_shift_binop(op.node) {
// Shift is a special case: rhs must be uint, no matter what lhs is
check_expr(fcx, &**rhs);
let rhs_ty = fcx.expr_ty(&**rhs);
demand::suptype(fcx, expr.span, tvar, lhs_t);
check_expr_has_type(fcx, &**rhs, tvar);
- let result_t = match op {
+ let result_t = match op.node {
ast::BiEq | ast::BiNe | ast::BiLt | ast::BiLe | ast::BiGe |
ast::BiGt => {
if ty::type_is_simd(tcx, lhs_t) {
operation `{}` not \
supported for floating \
point SIMD vector `{}`",
- ast_util::binop_to_string(op),
+ ast_util::binop_to_string(op.node),
actual)
},
lhs_t,
return;
}
- if op == ast::BiOr || op == ast::BiAnd {
+ if op.node == ast::BiOr || op.node == ast::BiAnd {
// This is an error; one of the operands must have the wrong
// type
fcx.write_error(expr.id);
|actual| {
format!("binary operation `{}` cannot be applied \
to type `{}`",
- ast_util::binop_to_string(op),
+ ast_util::binop_to_string(op.node),
actual)
},
lhs_t,
operation `{}=` \
cannot be applied to \
type `{}`",
- ast_util::binop_to_string(op),
+ ast_util::binop_to_string(op.node),
actual)
},
lhs_t,
rhs: &P<ast::Expr>) -> Ty<'tcx> {
let tcx = fcx.ccx.tcx;
let lang = &tcx.lang_items;
- let (name, trait_did) = match op {
+ let (name, trait_did) = match op.node {
ast::BiAdd => ("add", lang.add_trait()),
ast::BiSub => ("sub", lang.sub_trait()),
ast::BiMul => ("mul", lang.mul_trait()),
trait_did, lhs_expr, Some(rhs), || {
fcx.type_error_message(ex.span, |actual| {
format!("binary operation `{}` cannot be applied to type `{}`",
- ast_util::binop_to_string(op),
+ ast_util::binop_to_string(op.node),
actual)
}, lhs_resolved_t, None)
- }, if ast_util::is_by_value_binop(op) { AutorefArgs::No } else { AutorefArgs::Yes })
+ }, if ast_util::is_by_value_binop(op.node) { AutorefArgs::No } else { AutorefArgs::Yes })
}
fn check_user_unop<'a, 'tcx>(fcx: &FnCtxt<'a, 'tcx>,
},
ast::ExprBinary(op, ref lhs, ref rhs) if has_method_map => {
- let implicitly_ref_args = !ast_util::is_by_value_binop(op);
+ let implicitly_ref_args = !ast_util::is_by_value_binop(op.node);
// As `expr_method_call`, but the call is via an
// overloaded op. Note that we (sadly) currently use an
+++ /dev/null
-// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
-
-//! Base64 binary-to-text encoding
-
-pub use self::FromBase64Error::*;
-pub use self::CharacterSet::*;
-
-use std::fmt;
-use std::error;
-
-/// Available encoding character sets
-#[derive(Copy)]
-pub enum CharacterSet {
- /// The standard character set (uses `+` and `/`)
- Standard,
- /// The URL safe character set (uses `-` and `_`)
- UrlSafe
-}
-
-/// Available newline types
-#[derive(Copy)]
-pub enum Newline {
- /// A linefeed (i.e. Unix-style newline)
- LF,
- /// A carriage return and a linefeed (i.e. Windows-style newline)
- CRLF
-}
-
-/// Contains configuration parameters for `to_base64`.
-#[derive(Copy)]
-pub struct Config {
- /// Character set to use
- pub char_set: CharacterSet,
- /// Newline to use
- pub newline: Newline,
- /// True to pad output with `=` characters
- pub pad: bool,
- /// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping
- pub line_length: Option<uint>
-}
-
-/// Configuration for RFC 4648 standard base64 encoding
-pub static STANDARD: Config =
- Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: None};
-
-/// Configuration for RFC 4648 base64url encoding
-pub static URL_SAFE: Config =
- Config {char_set: UrlSafe, newline: Newline::CRLF, pad: false, line_length: None};
-
-/// Configuration for RFC 2045 MIME base64 encoding
-pub static MIME: Config =
- Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: Some(76)};
-
-static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
- abcdefghijklmnopqrstuvwxyz\
- 0123456789+/";
-
-static URLSAFE_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
- abcdefghijklmnopqrstuvwxyz\
- 0123456789-_";
-
-/// A trait for converting a value to base64 encoding.
-pub trait ToBase64 {
- /// Converts the value of `self` to a base64 value following the specified
- /// format configuration, returning the owned string.
- fn to_base64(&self, config: Config) -> String;
-}
-
-impl ToBase64 for [u8] {
- /// Turn a vector of `u8` bytes into a base64 string.
- ///
- /// # Example
- ///
- /// ```rust
- /// extern crate serialize;
- /// use serialize::base64::{ToBase64, STANDARD};
- ///
- /// fn main () {
- /// let str = [52,32].to_base64(STANDARD);
- /// println!("base 64 output: {}", str);
- /// }
- /// ```
- fn to_base64(&self, config: Config) -> String {
- let bytes = match config.char_set {
- Standard => STANDARD_CHARS,
- UrlSafe => URLSAFE_CHARS
- };
-
- // In general, this Vec only needs (4/3) * self.len() memory, but
- // addition is faster than multiplication and division.
- let mut v = Vec::with_capacity(self.len() + self.len());
- let mut i = 0;
- let mut cur_length = 0;
- let len = self.len();
- let mod_len = len % 3;
- let cond_len = len - mod_len;
- let newline = match config.newline {
- Newline::LF => b"\n",
- Newline::CRLF => b"\r\n"
- };
- while i < cond_len {
- let (first, second, third) = (self[i], self[i + 1], self[i + 2]);
- if let Some(line_length) = config.line_length {
- if cur_length >= line_length {
- v.push_all(newline);
- cur_length = 0;
- }
- }
-
- let n = (first as u32) << 16 |
- (second as u32) << 8 |
- (third as u32);
-
- // This 24-bit number gets separated into four 6-bit numbers.
- v.push(bytes[((n >> 18) & 63) as uint]);
- v.push(bytes[((n >> 12) & 63) as uint]);
- v.push(bytes[((n >> 6 ) & 63) as uint]);
- v.push(bytes[(n & 63) as uint]);
-
- cur_length += 4;
- i += 3;
- }
-
- if mod_len != 0 {
- if let Some(line_length) = config.line_length {
- if cur_length >= line_length {
- v.push_all(newline);
- }
- }
- }
-
- // Heh, would be cool if we knew this was exhaustive
- // (the dream of bounded integer types)
- match mod_len {
- 0 => (),
- 1 => {
- let n = (self[i] as u32) << 16;
- v.push(bytes[((n >> 18) & 63) as uint]);
- v.push(bytes[((n >> 12) & 63) as uint]);
- if config.pad {
- v.push(b'=');
- v.push(b'=');
- }
- }
- 2 => {
- let n = (self[i] as u32) << 16 |
- (self[i + 1u] as u32) << 8;
- v.push(bytes[((n >> 18) & 63) as uint]);
- v.push(bytes[((n >> 12) & 63) as uint]);
- v.push(bytes[((n >> 6 ) & 63) as uint]);
- if config.pad {
- v.push(b'=');
- }
- }
- _ => panic!("Algebra is broken, please alert the math police")
- }
-
- unsafe { String::from_utf8_unchecked(v) }
- }
-}
-
-/// A trait for converting from base64 encoded values.
-pub trait FromBase64 {
- /// Converts the value of `self`, interpreted as base64 encoded data, into
- /// an owned vector of bytes, returning the vector.
- fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error>;
-}
-
-/// Errors that can occur when decoding a base64 encoded string
-#[derive(Copy, Show)]
-pub enum FromBase64Error {
- /// The input contained a character not part of the base64 format
- InvalidBase64Byte(u8, uint),
- /// The input had an invalid length
- InvalidBase64Length,
-}
-
-impl fmt::Display for FromBase64Error {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match *self {
- InvalidBase64Byte(ch, idx) =>
- write!(f, "Invalid character '{}' at position {}", ch, idx),
- InvalidBase64Length => write!(f, "Invalid length"),
- }
- }
-}
-
-impl error::Error for FromBase64Error {
- fn description(&self) -> &str {
- match *self {
- InvalidBase64Byte(_, _) => "invalid character",
- InvalidBase64Length => "invalid length",
- }
- }
-}
-
-impl FromBase64 for str {
- /// Convert any base64 encoded string (literal, `@`, `&`, or `~`)
- /// to the byte values it encodes.
- ///
- /// You can use the `String::from_utf8` function to turn a `Vec<u8>` into a
- /// string with characters corresponding to those values.
- ///
- /// # Example
- ///
- /// This converts a string literal to base64 and back.
- ///
- /// ```rust
- /// extern crate serialize;
- /// use serialize::base64::{ToBase64, FromBase64, STANDARD};
- ///
- /// fn main () {
- /// let hello_str = b"Hello, World".to_base64(STANDARD);
- /// println!("base64 output: {}", hello_str);
- /// let res = hello_str.as_slice().from_base64();
- /// if res.is_ok() {
- /// let opt_bytes = String::from_utf8(res.unwrap());
- /// if opt_bytes.is_ok() {
- /// println!("decoded from base64: {}", opt_bytes.unwrap());
- /// }
- /// }
- /// }
- /// ```
- #[inline]
- fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> {
- self.as_bytes().from_base64()
- }
-}
-
-impl FromBase64 for [u8] {
- fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> {
- let mut r = Vec::with_capacity(self.len());
- let mut buf: u32 = 0;
- let mut modulus = 0i;
-
- let mut it = self.iter().enumerate();
- for (idx, &byte) in it {
- let val = byte as u32;
-
- match byte {
- b'A'...b'Z' => buf |= val - 0x41,
- b'a'...b'z' => buf |= val - 0x47,
- b'0'...b'9' => buf |= val + 0x04,
- b'+' | b'-' => buf |= 0x3E,
- b'/' | b'_' => buf |= 0x3F,
- b'\r' | b'\n' => continue,
- b'=' => break,
- _ => return Err(InvalidBase64Byte(self[idx], idx)),
- }
-
- buf <<= 6;
- modulus += 1;
- if modulus == 4 {
- modulus = 0;
- r.push((buf >> 22) as u8);
- r.push((buf >> 14) as u8);
- r.push((buf >> 6 ) as u8);
- }
- }
-
- for (idx, &byte) in it {
- match byte {
- b'=' | b'\r' | b'\n' => continue,
- _ => return Err(InvalidBase64Byte(self[idx], idx)),
- }
- }
-
- match modulus {
- 2 => {
- r.push((buf >> 10) as u8);
- }
- 3 => {
- r.push((buf >> 16) as u8);
- r.push((buf >> 8 ) as u8);
- }
- 0 => (),
- _ => return Err(InvalidBase64Length),
- }
-
- Ok(r)
- }
-}
-
-#[cfg(test)]
-mod tests {
- extern crate test;
- use self::test::Bencher;
- use base64::{Config, Newline, FromBase64, ToBase64, STANDARD, URL_SAFE};
-
- #[test]
- fn test_to_base64_basic() {
- assert_eq!("".as_bytes().to_base64(STANDARD), "");
- assert_eq!("f".as_bytes().to_base64(STANDARD), "Zg==");
- assert_eq!("fo".as_bytes().to_base64(STANDARD), "Zm8=");
- assert_eq!("foo".as_bytes().to_base64(STANDARD), "Zm9v");
- assert_eq!("foob".as_bytes().to_base64(STANDARD), "Zm9vYg==");
- assert_eq!("fooba".as_bytes().to_base64(STANDARD), "Zm9vYmE=");
- assert_eq!("foobar".as_bytes().to_base64(STANDARD), "Zm9vYmFy");
- }
-
- #[test]
- fn test_to_base64_crlf_line_break() {
- assert!(![0u8; 1000].to_base64(Config {line_length: None, ..STANDARD})
- .contains("\r\n"));
- assert_eq!(b"foobar".to_base64(Config {line_length: Some(4),
- ..STANDARD}),
- "Zm9v\r\nYmFy");
- }
-
- #[test]
- fn test_to_base64_lf_line_break() {
- assert!(![0u8; 1000].to_base64(Config {line_length: None,
- newline: Newline::LF,
- ..STANDARD})
- .as_slice()
- .contains("\n"));
- assert_eq!(b"foobar".to_base64(Config {line_length: Some(4),
- newline: Newline::LF,
- ..STANDARD}),
- "Zm9v\nYmFy");
- }
-
- #[test]
- fn test_to_base64_padding() {
- assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zg");
- assert_eq!("fo".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zm8");
- }
-
- #[test]
- fn test_to_base64_url_safe() {
- assert_eq!([251, 255].to_base64(URL_SAFE), "-_8");
- assert_eq!([251, 255].to_base64(STANDARD), "+/8=");
- }
-
- #[test]
- fn test_from_base64_basic() {
- assert_eq!("".from_base64().unwrap(), b"");
- assert_eq!("Zg==".from_base64().unwrap(), b"f");
- assert_eq!("Zm8=".from_base64().unwrap(), b"fo");
- assert_eq!("Zm9v".from_base64().unwrap(), b"foo");
- assert_eq!("Zm9vYg==".from_base64().unwrap(), b"foob");
- assert_eq!("Zm9vYmE=".from_base64().unwrap(), b"fooba");
- assert_eq!("Zm9vYmFy".from_base64().unwrap(), b"foobar");
- }
-
- #[test]
- fn test_from_base64_bytes() {
- assert_eq!(b"Zm9vYmFy".from_base64().unwrap(), b"foobar");
- }
-
- #[test]
- fn test_from_base64_newlines() {
- assert_eq!("Zm9v\r\nYmFy".from_base64().unwrap(),
- b"foobar");
- assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(),
- b"foob");
- assert_eq!("Zm9v\nYmFy".from_base64().unwrap(),
- b"foobar");
- assert_eq!("Zm9vYg==\n".from_base64().unwrap(),
- b"foob");
- }
-
- #[test]
- fn test_from_base64_urlsafe() {
- assert_eq!("-_8".from_base64().unwrap(), "+/8=".from_base64().unwrap());
- }
-
- #[test]
- fn test_from_base64_invalid_char() {
- assert!("Zm$=".from_base64().is_err());
- assert!("Zg==$".from_base64().is_err());
- }
-
- #[test]
- fn test_from_base64_invalid_padding() {
- assert!("Z===".from_base64().is_err());
- }
-
- #[test]
- fn test_base64_random() {
- use std::rand::{thread_rng, random, Rng};
-
- for _ in range(0u, 1000) {
- let times = thread_rng().gen_range(1u, 100);
- let v = thread_rng().gen_iter::<u8>().take(times).collect::<Vec<_>>();
- assert_eq!(v.to_base64(STANDARD)
- .from_base64()
- .unwrap(),
- v);
- }
- }
-
- #[bench]
- pub fn bench_to_base64(b: &mut Bencher) {
- let s = "イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム \
- ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン";
- b.iter(|| {
- s.as_bytes().to_base64(STANDARD);
- });
- b.bytes = s.len() as u64;
- }
-
- #[bench]
- pub fn bench_from_base64(b: &mut Bencher) {
- let s = "イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム \
- ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン";
- let sb = s.as_bytes().to_base64(STANDARD);
- b.iter(|| {
- sb.from_base64().unwrap();
- });
- b.bytes = sb.len() as u64;
- }
-
-}
mod serialize;
mod collection_impls;
-pub mod base64;
pub mod hex;
pub mod json;
pub use self::c_str::c_str_to_bytes;
pub use self::c_str::c_str_to_bytes_with_nul;
+pub use self::os_str::OsString;
+pub use self::os_str::OsStr;
+pub use self::os_str::AsOsStr;
+
mod c_str;
+mod os_str;
--- /dev/null
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! A type that can represent all platform-native strings, but is cheaply
+//! interconvertable with Rust strings.
+//!
+//! The need for this type arises from the fact that:
+//!
+//! * On Unix systems, strings are often arbitrary sequences of non-zero
+//! bytes, in many cases interpreted as UTF-8.
+//!
+//! * On Windows, strings are often arbitrary sequences of non-zero 16-bit
+//! values, interpreted as UTF-16 when it is valid to do so.
+//!
+//! * In Rust, strings are always valid UTF-8, but may contain zeros.
+//!
+//! The types in this module bridge this gap by simultaneously representing Rust
+//! and platform-native string values, and in particular allowing a Rust string
+//! to be converted into an "OS" string with no cost.
+//!
+//! **Note**: At the moment, these types are extremely bare-bones, usable only
+//! for conversion to/from various other string types. Eventually these types
+//! will offer a full-fledged string API.
+
+#![unstable = "recently added as part of path/io reform"]
+
+use core::prelude::*;
+
+use core::borrow::{BorrowFrom, ToOwned};
+use fmt::{self, Debug};
+use mem;
+use string::{String, CowString};
+use ops;
+use cmp;
+use hash::{Hash, Hasher, Writer};
+
+use sys::os_str::{Buf, Slice};
+use sys_common::{AsInner, IntoInner, FromInner};
+
+/// Owned, mutable OS strings.
+#[derive(Clone)]
+pub struct OsString {
+ inner: Buf
+}
+
+/// Slices into OS strings.
+pub struct OsStr {
+ inner: Slice
+}
+
+impl OsString {
+ /// Constructs an `OsString` at no cost by consuming a `String`.
+ pub fn from_string(s: String) -> OsString {
+ OsString { inner: Buf::from_string(s) }
+ }
+
+ /// Constructs an `OsString` by copying from a `&str` slice.
+ ///
+ /// Equivalent to: `OsString::from_string(String::from_str(s))`.
+ pub fn from_str(s: &str) -> OsString {
+ OsString { inner: Buf::from_str(s) }
+ }
+
+ /// Convert the `OsString` into a `String` if it contains valid Unicode data.
+ ///
+ /// On failure, ownership of the original `OsString` is returned.
+ pub fn into_string(self) -> Result<String, OsString> {
+ self.inner.into_string().map_err(|buf| OsString { inner: buf} )
+ }
+
+ /// Extend the string with the given `&OsStr` slice.
+ pub fn push_os_str(&mut self, s: &OsStr) {
+ self.inner.push_slice(&s.inner)
+ }
+}
+
+impl ops::Index<ops::FullRange> for OsString {
+ type Output = OsStr;
+
+ #[inline]
+ fn index(&self, _index: &ops::FullRange) -> &OsStr {
+ unsafe { mem::transmute(self.inner.as_slice()) }
+ }
+}
+
+impl ops::Deref for OsString {
+ type Target = OsStr;
+
+ #[inline]
+ fn deref(&self) -> &OsStr {
+ &self[]
+ }
+}
+
+impl Debug for OsString {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ fmt::Debug::fmt(&**self, formatter)
+ }
+}
+
+impl OsStr {
+ /// Coerce directly from a `&str` slice to a `&OsStr` slice.
+ pub fn from_str(s: &str) -> &OsStr {
+ unsafe { mem::transmute(Slice::from_str(s)) }
+ }
+
+ /// Yield a `&str` slice if the `OsStr` is valid unicode.
+ ///
+ /// This conversion may entail doing a check for UTF-8 validity.
+ pub fn to_str(&self) -> Option<&str> {
+ self.inner.to_str()
+ }
+
+ /// Convert an `OsStr` to a `CowString`.
+ ///
+ /// Any non-Unicode sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
+ pub fn to_string_lossy(&self) -> CowString {
+ self.inner.to_string_lossy()
+ }
+
+ /// Copy the slice into an onwed `OsString`.
+ pub fn to_os_string(&self) -> OsString {
+ OsString { inner: self.inner.to_owned() }
+ }
+
+ /// Get the underlying byte representation.
+ ///
+ /// Note: it is *crucial* that this API is private, to avoid
+ /// revealing the internal, platform-specific encodings.
+ fn bytes(&self) -> &[u8] {
+ unsafe { mem::transmute(&self.inner) }
+ }
+}
+
+impl PartialEq for OsStr {
+ fn eq(&self, other: &OsStr) -> bool {
+ self.bytes().eq(other.bytes())
+ }
+}
+
+impl PartialEq<str> for OsStr {
+ fn eq(&self, other: &str) -> bool {
+ *self == *OsStr::from_str(other)
+ }
+}
+
+impl PartialEq<OsStr> for str {
+ fn eq(&self, other: &OsStr) -> bool {
+ *other == *OsStr::from_str(self)
+ }
+}
+
+impl Eq for OsStr {}
+
+impl PartialOrd for OsStr {
+ #[inline]
+ fn partial_cmp(&self, other: &OsStr) -> Option<cmp::Ordering> {
+ self.bytes().partial_cmp(other.bytes())
+ }
+ #[inline]
+ fn lt(&self, other: &OsStr) -> bool { self.bytes().lt(other.bytes()) }
+ #[inline]
+ fn le(&self, other: &OsStr) -> bool { self.bytes().le(other.bytes()) }
+ #[inline]
+ fn gt(&self, other: &OsStr) -> bool { self.bytes().gt(other.bytes()) }
+ #[inline]
+ fn ge(&self, other: &OsStr) -> bool { self.bytes().ge(other.bytes()) }
+}
+
+impl PartialOrd<str> for OsStr {
+ #[inline]
+ fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> {
+ self.partial_cmp(OsStr::from_str(other))
+ }
+}
+
+// FIXME (#19470): cannot provide PartialOrd<OsStr> for str until we
+// have more flexible coherence rules.
+
+impl Ord for OsStr {
+ #[inline]
+ fn cmp(&self, other: &OsStr) -> cmp::Ordering { self.bytes().cmp(other.bytes()) }
+}
+
+impl<'a, S: Hasher + Writer> Hash<S> for OsStr {
+ #[inline]
+ fn hash(&self, state: &mut S) {
+ self.bytes().hash(state)
+ }
+}
+
+impl Debug for OsStr {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ self.inner.fmt(formatter)
+ }
+}
+
+impl BorrowFrom<OsString> for OsStr {
+ fn borrow_from(owned: &OsString) -> &OsStr { &owned[] }
+}
+
+impl ToOwned<OsString> for OsStr {
+ fn to_owned(&self) -> OsString { self.to_os_string() }
+}
+
+/// Freely convertible to an `&OsStr` slice.
+pub trait AsOsStr {
+ /// Convert to an `&OsStr` slice.
+ fn as_os_str(&self) -> &OsStr;
+}
+
+impl AsOsStr for OsStr {
+ fn as_os_str(&self) -> &OsStr {
+ self
+ }
+}
+
+impl AsOsStr for OsString {
+ fn as_os_str(&self) -> &OsStr {
+ &self[]
+ }
+}
+
+impl AsOsStr for str {
+ fn as_os_str(&self) -> &OsStr {
+ OsStr::from_str(self)
+ }
+}
+
+impl AsOsStr for String {
+ fn as_os_str(&self) -> &OsStr {
+ OsStr::from_str(&self[])
+ }
+}
+
+impl FromInner<Buf> for OsString {
+ fn from_inner(buf: Buf) -> OsString {
+ OsString { inner: buf }
+ }
+}
+
+impl IntoInner<Buf> for OsString {
+ fn into_inner(self) -> Buf {
+ self.inner
+ }
+}
+
+impl AsInner<Slice> for OsStr {
+ fn as_inner(&self) -> &Slice {
+ &self.inner
+ }
+}
/// A `send` operation can only fail if the receiving end of a channel is
/// disconnected, implying that the data could never be received. The error
/// contains the data being sent as a payload so it can be recovered.
-#[derive(PartialEq, Eq, Show)]
+#[derive(PartialEq, Eq)]
#[stable]
pub struct SendError<T>(pub T);
/// This enumeration is the list of the possible error outcomes for the
/// `SyncSender::try_send` method.
-#[derive(PartialEq, Clone, Show)]
+#[derive(PartialEq, Clone)]
#[stable]
pub enum TrySendError<T> {
/// The data could not be sent on the channel because it would require that
}
}
+#[stable]
+impl<T> fmt::Debug for SendError<T> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ "SendError(..)".fmt(f)
+ }
+}
+
#[stable]
impl<T> fmt::Display for SendError<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
}
}
+#[stable]
+impl<T> fmt::Debug for TrySendError<T> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ TrySendError::Full(..) => "Full(..)".fmt(f),
+ TrySendError::Disconnected(..) => "Disconnected(..)".fmt(f),
+ }
+ }
+}
+
#[stable]
impl<T> fmt::Display for TrySendError<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
/// is held. The precise semantics for when a lock is poisoned is documented on
/// each lock, but once a lock is poisoned then all future acquisitions will
/// return this error.
-#[derive(Show)]
#[stable]
pub struct PoisonError<T> {
guard: T,
/// An enumeration of possible errors which can occur while calling the
/// `try_lock` method.
-#[derive(Show)]
#[stable]
pub enum TryLockError<T> {
/// The lock could not be acquired because another task failed while holding
#[stable]
pub type TryLockResult<Guard> = Result<Guard, TryLockError<Guard>>;
+#[stable]
+impl<T> fmt::Debug for PoisonError<T> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ "PoisonError { inner: .. }".fmt(f)
+ }
+}
+
#[stable]
impl<T> fmt::Display for PoisonError<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
}
}
+#[stable]
+impl<T> fmt::Debug for TryLockError<T> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ TryLockError::Poisoned(..) => "Poisoned(..)".fmt(f),
+ TryLockError::WouldBlock => "WouldBlock".fmt(f)
+ }
+ }
+}
+
#[stable]
impl<T> fmt::Display for TryLockError<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
pub mod thread;
pub mod thread_info;
pub mod thread_local;
+pub mod wtf8;
// common error constructors
return (origamt - amt) as i64;
}
-// A trait for extracting representations from std::io types
-pub trait AsInner<Inner> {
+/// A trait for viewing representations from std types
+pub trait AsInner<Inner: ?Sized> {
fn as_inner(&self) -> &Inner;
}
+/// A trait for extracting representations from std types
+pub trait IntoInner<Inner> {
+ fn into_inner(self) -> Inner;
+}
+
+/// A trait for creating std types from internal representations
+pub trait FromInner<Inner> {
+ fn from_inner(inner: Inner) -> Self;
+}
+
pub trait ProcessConfig<K: BytesContainer, V: BytesContainer> {
fn program(&self) -> &CString;
fn args(&self) -> &[CString];
--- /dev/null
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Implementation of [the WTF-8 encoding](https://simonsapin.github.io/wtf-8/).
+//!
+//! This library uses Rust’s type system to maintain
+//! [well-formedness](https://simonsapin.github.io/wtf-8/#well-formed),
+//! like the `String` and `&str` types do for UTF-8.
+//!
+//! Since [WTF-8 must not be used
+//! for interchange](https://simonsapin.github.io/wtf-8/#intended-audience),
+//! this library deliberately does not provide access to the underlying bytes
+//! of WTF-8 strings,
+//! nor can it decode WTF-8 from arbitrary bytes.
+//! WTF-8 strings can be obtained from UTF-8, UTF-16, or code points.
+
+use core::prelude::*;
+
+use core::char::{encode_utf8_raw, encode_utf16_raw};
+use core::str::{char_range_at_raw, next_code_point};
+use core::raw::Slice as RawSlice;
+
+use borrow::Cow;
+use cmp;
+use fmt;
+use hash::{Hash, Writer, Hasher};
+use iter::FromIterator;
+use mem;
+use num::Int;
+use ops;
+use slice;
+use str;
+use string::{String, CowString};
+use unicode::str::{Utf16Item, utf16_items};
+use vec::Vec;
+
+static UTF8_REPLACEMENT_CHARACTER: &'static [u8] = b"\xEF\xBF\xBD";
+
+/// A Unicode code point: from U+0000 to U+10FFFF.
+///
+/// Compare with the `char` type,
+/// which represents a Unicode scalar value:
+/// a code point that is not a surrogate (U+D800 to U+DFFF).
+#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Copy)]
+pub struct CodePoint {
+ value: u32
+}
+
+/// Format the code point as `U+` followed by four to six hexadecimal digits.
+/// Example: `U+1F4A9`
+impl fmt::Debug for CodePoint {
+ #[inline]
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ write!(formatter, "U+{:04X}", self.value)
+ }
+}
+
+impl CodePoint {
+ /// Unsafely create a new `CodePoint` without checking the value.
+ ///
+ /// Only use when `value` is known to be less than or equal to 0x10FFFF.
+ #[inline]
+ pub unsafe fn from_u32_unchecked(value: u32) -> CodePoint {
+ CodePoint { value: value }
+ }
+
+ /// Create a new `CodePoint` if the value is a valid code point.
+ ///
+ /// Return `None` if `value` is above 0x10FFFF.
+ #[inline]
+ pub fn from_u32(value: u32) -> Option<CodePoint> {
+ match value {
+ 0 ... 0x10FFFF => Some(CodePoint { value: value }),
+ _ => None
+ }
+ }
+
+ /// Create a new `CodePoint` from a `char`.
+ ///
+ /// Since all Unicode scalar values are code points, this always succeds.
+ #[inline]
+ pub fn from_char(value: char) -> CodePoint {
+ CodePoint { value: value as u32 }
+ }
+
+ /// Return the numeric value of the code point.
+ #[inline]
+ pub fn to_u32(&self) -> u32 {
+ self.value
+ }
+
+ /// Optionally return a Unicode scalar value for the code point.
+ ///
+ /// Return `None` if the code point is a surrogate (from U+D800 to U+DFFF).
+ #[inline]
+ pub fn to_char(&self) -> Option<char> {
+ match self.value {
+ 0xD800 ... 0xDFFF => None,
+ _ => Some(unsafe { mem::transmute(self.value) })
+ }
+ }
+
+ /// Return a Unicode scalar value for the code point.
+ ///
+ /// Return `'\u{FFFD}'` (the replacement character “�”)
+ /// if the code point is a surrogate (from U+D800 to U+DFFF).
+ #[inline]
+ pub fn to_char_lossy(&self) -> char {
+ self.to_char().unwrap_or('\u{FFFD}')
+ }
+}
+
+/// An owned, growable string of well-formed WTF-8 data.
+///
+/// Similar to `String`, but can additionally contain surrogate code points
+/// if they’re not in a surrogate pair.
+#[derive(Eq, PartialEq, Ord, PartialOrd, Clone)]
+pub struct Wtf8Buf {
+ bytes: Vec<u8>
+}
+
+impl ops::Deref for Wtf8Buf {
+ type Target = Wtf8;
+
+ fn deref(&self) -> &Wtf8 {
+ self.as_slice()
+ }
+}
+
+/// Format the string with double quotes,
+/// and surrogates as `\u` followed by four hexadecimal digits.
+/// Example: `"a\u{D800}"` for a string with code points [U+0061, U+D800]
+impl fmt::Debug for Wtf8Buf {
+ #[inline]
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ self.as_slice().fmt(formatter)
+ }
+}
+
+impl Wtf8Buf {
+ /// Create an new, empty WTF-8 string.
+ #[inline]
+ pub fn new() -> Wtf8Buf {
+ Wtf8Buf { bytes: Vec::new() }
+ }
+
+ /// Create an new, empty WTF-8 string with pre-allocated capacity for `n` bytes.
+ #[inline]
+ pub fn with_capacity(n: uint) -> Wtf8Buf {
+ Wtf8Buf { bytes: Vec::with_capacity(n) }
+ }
+
+ /// Create a WTF-8 string from an UTF-8 `String`.
+ ///
+ /// This takes ownership of the `String` and does not copy.
+ ///
+ /// Since WTF-8 is a superset of UTF-8, this always succeeds.
+ #[inline]
+ pub fn from_string(string: String) -> Wtf8Buf {
+ Wtf8Buf { bytes: string.into_bytes() }
+ }
+
+ /// Create a WTF-8 string from an UTF-8 `&str` slice.
+ ///
+ /// This copies the content of the slice.
+ ///
+ /// Since WTF-8 is a superset of UTF-8, this always succeeds.
+ #[inline]
+ pub fn from_str(str: &str) -> Wtf8Buf {
+ Wtf8Buf { bytes: slice::SliceExt::to_vec(str.as_bytes()) }
+ }
+
+ /// Create a WTF-8 string from a potentially ill-formed UTF-16 slice of 16-bit code units.
+ ///
+ /// This is lossless: calling `.encode_wide()` on the resulting string
+ /// will always return the original code units.
+ pub fn from_wide(v: &[u16]) -> Wtf8Buf {
+ let mut string = Wtf8Buf::with_capacity(v.len());
+ for item in utf16_items(v) {
+ match item {
+ Utf16Item::ScalarValue(c) => string.push_char(c),
+ Utf16Item::LoneSurrogate(s) => {
+ // Surrogates are known to be in the code point range.
+ let code_point = unsafe { CodePoint::from_u32_unchecked(s as u32) };
+ // Skip the WTF-8 concatenation check,
+ // surrogate pairs are already decoded by utf16_items
+ string.push_code_point_unchecked(code_point)
+ }
+ }
+ }
+ string
+ }
+
+ /// Copied from String::push
+ /// This does **not** include the WTF-8 concatenation check.
+ fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
+ let cur_len = self.len();
+ // This may use up to 4 bytes.
+ self.reserve(4);
+
+ unsafe {
+ // Attempt to not use an intermediate buffer by just pushing bytes
+ // directly onto this string.
+ let slice = RawSlice {
+ data: self.bytes.as_ptr().offset(cur_len as int),
+ len: 4,
+ };
+ let used = encode_utf8_raw(code_point.value, mem::transmute(slice))
+ .unwrap_or(0);
+ self.bytes.set_len(cur_len + used);
+ }
+ }
+
+ #[inline]
+ pub fn as_slice(&self) -> &Wtf8 {
+ unsafe { mem::transmute(self.bytes.as_slice()) }
+ }
+
+ /// Reserves capacity for at least `additional` more bytes to be inserted
+ /// in the given `Wtf8Buf`.
+ /// The collection may reserve more space to avoid frequent reallocations.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the new capacity overflows `uint`.
+ #[inline]
+ pub fn reserve(&mut self, additional: uint) {
+ self.bytes.reserve(additional)
+ }
+
+ /// Returns the number of bytes that this string buffer can hold without reallocating.
+ #[inline]
+ pub fn capacity(&self) -> uint {
+ self.bytes.capacity()
+ }
+
+ /// Append an UTF-8 slice at the end of the string.
+ #[inline]
+ pub fn push_str(&mut self, other: &str) {
+ self.bytes.push_all(other.as_bytes())
+ }
+
+ /// Append a WTF-8 slice at the end of the string.
+ ///
+ /// This replaces newly paired surrogates at the boundary
+ /// with a supplementary code point,
+ /// like concatenating ill-formed UTF-16 strings effectively would.
+ #[inline]
+ pub fn push_wtf8(&mut self, other: &Wtf8) {
+ match ((&*self).final_lead_surrogate(), other.initial_trail_surrogate()) {
+ // Replace newly paired surrogates by a supplementary code point.
+ (Some(lead), Some(trail)) => {
+ let len_without_lead_surrogate = self.len() - 3;
+ self.bytes.truncate(len_without_lead_surrogate);
+ let other_without_trail_surrogate = &other.bytes[3..];
+ // 4 bytes for the supplementary code point
+ self.bytes.reserve(4 + other_without_trail_surrogate.len());
+ self.push_char(decode_surrogate_pair(lead, trail));
+ self.bytes.push_all(other_without_trail_surrogate);
+ }
+ _ => self.bytes.push_all(&other.bytes)
+ }
+ }
+
+ /// Append a Unicode scalar value at the end of the string.
+ #[inline]
+ pub fn push_char(&mut self, c: char) {
+ self.push_code_point_unchecked(CodePoint::from_char(c))
+ }
+
+ /// Append a code point at the end of the string.
+ ///
+ /// This replaces newly paired surrogates at the boundary
+ /// with a supplementary code point,
+ /// like concatenating ill-formed UTF-16 strings effectively would.
+ #[inline]
+ pub fn push(&mut self, code_point: CodePoint) {
+ match code_point.to_u32() {
+ trail @ 0xDC00...0xDFFF => {
+ match (&*self).final_lead_surrogate() {
+ Some(lead) => {
+ let len_without_lead_surrogate = self.len() - 3;
+ self.bytes.truncate(len_without_lead_surrogate);
+ self.push_char(decode_surrogate_pair(lead, trail as u16));
+ return
+ }
+ _ => {}
+ }
+ }
+ _ => {}
+ }
+
+ // No newly paired surrogates at the boundary.
+ self.push_code_point_unchecked(code_point)
+ }
+
+ /// Shortens a string to the specified length.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `new_len` > current length,
+ /// or if `new_len` is not a code point boundary.
+ #[inline]
+ pub fn truncate(&mut self, new_len: uint) {
+ assert!(is_code_point_boundary(self.as_slice(), new_len));
+ self.bytes.truncate(new_len)
+ }
+
+ /// Consume the WTF-8 string and try to convert it to UTF-8.
+ ///
+ /// This does not copy the data.
+ ///
+ /// If the contents are not well-formed UTF-8
+ /// (that is, if the string contains surrogates),
+ /// the original WTF-8 string is returned instead.
+ pub fn into_string(self) -> Result<String, Wtf8Buf> {
+ match self.next_surrogate(0) {
+ None => Ok(unsafe { String::from_utf8_unchecked(self.bytes) }),
+ Some(_) => Err(self),
+ }
+ }
+
+ /// Consume the WTF-8 string and convert it lossily to UTF-8.
+ ///
+ /// This does not copy the data (but may overwrite parts of it in place).
+ ///
+ /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”)
+ pub fn into_string_lossy(mut self) -> String {
+ let mut pos = 0;
+ loop {
+ match self.next_surrogate(pos) {
+ Some((surrogate_pos, _)) => {
+ pos = surrogate_pos + 3;
+ slice::bytes::copy_memory(
+ &mut self.bytes[surrogate_pos .. pos],
+ UTF8_REPLACEMENT_CHARACTER
+ );
+ },
+ None => return unsafe { String::from_utf8_unchecked(self.bytes) }
+ }
+ }
+ }
+}
+
+/// Create a new WTF-8 string from an iterator of code points.
+///
+/// This replaces surrogate code point pairs with supplementary code points,
+/// like concatenating ill-formed UTF-16 strings effectively would.
+impl FromIterator<CodePoint> for Wtf8Buf {
+ fn from_iter<T: Iterator<Item=CodePoint>>(iterator: T) -> Wtf8Buf {
+ let mut string = Wtf8Buf::new();
+ string.extend(iterator);
+ string
+ }
+}
+
+/// Append code points from an iterator to the string.
+///
+/// This replaces surrogate code point pairs with supplementary code points,
+/// like concatenating ill-formed UTF-16 strings effectively would.
+impl Extend<CodePoint> for Wtf8Buf {
+ fn extend<T: Iterator<Item=CodePoint>>(&mut self, mut iterator: T) {
+ let (low, _high) = iterator.size_hint();
+ // Lower bound of one byte per code point (ASCII only)
+ self.bytes.reserve(low);
+ for code_point in iterator {
+ self.push(code_point);
+ }
+ }
+}
+
+/// A borrowed slice of well-formed WTF-8 data.
+///
+/// Similar to `&str`, but can additionally contain surrogate code points
+/// if they’re not in a surrogate pair.
+pub struct Wtf8 {
+ bytes: [u8]
+}
+
+// FIXME: https://github.com/rust-lang/rust/issues/18805
+impl PartialEq for Wtf8 {
+ fn eq(&self, other: &Wtf8) -> bool { self.bytes.eq(&other.bytes) }
+}
+
+// FIXME: https://github.com/rust-lang/rust/issues/18805
+impl Eq for Wtf8 {}
+
+// FIXME: https://github.com/rust-lang/rust/issues/18738
+impl PartialOrd for Wtf8 {
+ #[inline]
+ fn partial_cmp(&self, other: &Wtf8) -> Option<cmp::Ordering> {
+ self.bytes.partial_cmp(&other.bytes)
+ }
+ #[inline]
+ fn lt(&self, other: &Wtf8) -> bool { self.bytes.lt(&other.bytes) }
+ #[inline]
+ fn le(&self, other: &Wtf8) -> bool { self.bytes.le(&other.bytes) }
+ #[inline]
+ fn gt(&self, other: &Wtf8) -> bool { self.bytes.gt(&other.bytes) }
+ #[inline]
+ fn ge(&self, other: &Wtf8) -> bool { self.bytes.ge(&other.bytes) }
+}
+
+// FIXME: https://github.com/rust-lang/rust/issues/18738
+impl Ord for Wtf8 {
+ #[inline]
+ fn cmp(&self, other: &Wtf8) -> cmp::Ordering { self.bytes.cmp(&other.bytes) }
+}
+
+/// Format the slice with double quotes,
+/// and surrogates as `\u` followed by four hexadecimal digits.
+/// Example: `"a\u{D800}"` for a slice with code points [U+0061, U+D800]
+impl fmt::Debug for Wtf8 {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ try!(formatter.write_str("\""));
+ let mut pos = 0;
+ loop {
+ match self.next_surrogate(pos) {
+ None => break,
+ Some((surrogate_pos, surrogate)) => {
+ try!(formatter.write_str(unsafe {
+ // the data in this slice is valid UTF-8, transmute to &str
+ mem::transmute(&self.bytes[pos .. surrogate_pos])
+ }));
+ try!(write!(formatter, "\\u{{{:X}}}", surrogate));
+ pos = surrogate_pos + 3;
+ }
+ }
+ }
+ try!(formatter.write_str(unsafe {
+ // the data in this slice is valid UTF-8, transmute to &str
+ mem::transmute(&self.bytes[pos..])
+ }));
+ formatter.write_str("\"")
+ }
+}
+
+impl Wtf8 {
+ /// Create a WTF-8 slice from a UTF-8 `&str` slice.
+ ///
+ /// Since WTF-8 is a superset of UTF-8, this always succeeds.
+ #[inline]
+ pub fn from_str(value: &str) -> &Wtf8 {
+ unsafe { mem::transmute(value.as_bytes()) }
+ }
+
+ /// Return the length, in WTF-8 bytes.
+ #[inline]
+ pub fn len(&self) -> uint {
+ self.bytes.len()
+ }
+
+ /// Return the code point at `position` if it is in the ASCII range,
+ /// or `b'\xFF' otherwise.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `position` is beyond the end of the string.
+ #[inline]
+ pub fn ascii_byte_at(&self, position: uint) -> u8 {
+ match self.bytes[position] {
+ ascii_byte @ 0x00 ... 0x7F => ascii_byte,
+ _ => 0xFF
+ }
+ }
+
+ /// Return the code point at `position`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `position` is not at a code point boundary,
+ /// or is beyond the end of the string.
+ #[inline]
+ pub fn code_point_at(&self, position: uint) -> CodePoint {
+ let (code_point, _) = self.code_point_range_at(position);
+ code_point
+ }
+
+ /// Return the code point at `position`
+ /// and the position of the next code point.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `position` is not at a code point boundary,
+ /// or is beyond the end of the string.
+ #[inline]
+ pub fn code_point_range_at(&self, position: uint) -> (CodePoint, uint) {
+ let (c, n) = char_range_at_raw(&self.bytes, position);
+ (CodePoint { value: c }, n)
+ }
+
+ /// Return an iterator for the string’s code points.
+ #[inline]
+ pub fn code_points(&self) -> Wtf8CodePoints {
+ Wtf8CodePoints { bytes: self.bytes.iter() }
+ }
+
+ /// Try to convert the string to UTF-8 and return a `&str` slice.
+ ///
+ /// Return `None` if the string contains surrogates.
+ ///
+ /// This does not copy the data.
+ #[inline]
+ pub fn as_str(&self) -> Option<&str> {
+ // Well-formed WTF-8 is also well-formed UTF-8
+ // if and only if it contains no surrogate.
+ match self.next_surrogate(0) {
+ None => Some(unsafe { str::from_utf8_unchecked(&self.bytes) }),
+ Some(_) => None,
+ }
+ }
+
+ /// Lossily convert the string to UTF-8.
+ /// Return an UTF-8 `&str` slice if the contents are well-formed in UTF-8.
+ ///
+ /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”).
+ ///
+ /// This only copies the data if necessary (if it contains any surrogate).
+ pub fn to_string_lossy(&self) -> CowString {
+ let surrogate_pos = match self.next_surrogate(0) {
+ None => return Cow::Borrowed(unsafe { str::from_utf8_unchecked(&self.bytes) }),
+ Some((pos, _)) => pos,
+ };
+ let wtf8_bytes = &self.bytes;
+ let mut utf8_bytes = Vec::with_capacity(self.len());
+ utf8_bytes.push_all(&wtf8_bytes[..surrogate_pos]);
+ utf8_bytes.push_all(UTF8_REPLACEMENT_CHARACTER);
+ let mut pos = surrogate_pos + 3;
+ loop {
+ match self.next_surrogate(pos) {
+ Some((surrogate_pos, _)) => {
+ utf8_bytes.push_all(&wtf8_bytes[pos .. surrogate_pos]);
+ utf8_bytes.push_all(UTF8_REPLACEMENT_CHARACTER);
+ pos = surrogate_pos + 3;
+ },
+ None => {
+ utf8_bytes.push_all(&wtf8_bytes[pos..]);
+ return Cow::Owned(unsafe { String::from_utf8_unchecked(utf8_bytes) })
+ }
+ }
+ }
+ }
+
+ /// Convert the WTF-8 string to potentially ill-formed UTF-16
+ /// and return an iterator of 16-bit code units.
+ ///
+ /// This is lossless:
+ /// calling `Wtf8Buf::from_ill_formed_utf16` on the resulting code units
+ /// would always return the original WTF-8 string.
+ #[inline]
+ pub fn encode_wide(&self) -> EncodeWide {
+ EncodeWide { code_points: self.code_points(), extra: 0 }
+ }
+
+ #[inline]
+ fn next_surrogate(&self, mut pos: uint) -> Option<(uint, u16)> {
+ let mut iter = self.bytes[pos..].iter();
+ loop {
+ let b = match iter.next() {
+ None => return None,
+ Some(&b) => b,
+ };
+ if b < 0x80 {
+ pos += 1;
+ } else if b < 0xE0 {
+ iter.next();
+ pos += 2;
+ } else if b == 0xED {
+ match (iter.next(), iter.next()) {
+ (Some(&b2), Some(&b3)) if b2 >= 0xA0 => {
+ return Some((pos, decode_surrogate(b2, b3)))
+ }
+ _ => pos += 3
+ }
+ } else if b < 0xF0 {
+ iter.next();
+ iter.next();
+ pos += 3;
+ } else {
+ iter.next();
+ iter.next();
+ iter.next();
+ pos += 4;
+ }
+ }
+ }
+
+ #[inline]
+ fn final_lead_surrogate(&self) -> Option<u16> {
+ let len = self.len();
+ if len < 3 {
+ return None
+ }
+ match &self.bytes[(len - 3)..] {
+ [0xED, b2 @ 0xA0...0xAF, b3] => Some(decode_surrogate(b2, b3)),
+ _ => None
+ }
+ }
+
+ #[inline]
+ fn initial_trail_surrogate(&self) -> Option<u16> {
+ let len = self.len();
+ if len < 3 {
+ return None
+ }
+ match &self.bytes[..3] {
+ [0xED, b2 @ 0xB0...0xBF, b3] => Some(decode_surrogate(b2, b3)),
+ _ => None
+ }
+ }
+}
+
+
+/// Return a slice of the given string for the byte range [`begin`..`end`).
+///
+/// # Panics
+///
+/// Panics when `begin` and `end` do not point to code point boundaries,
+/// or point beyond the end of the string.
+impl ops::Index<ops::Range<usize>> for Wtf8 {
+ type Output = Wtf8;
+
+ #[inline]
+ fn index(&self, range: &ops::Range<usize>) -> &Wtf8 {
+ // is_code_point_boundary checks that the index is in [0, .len()]
+ if range.start <= range.end &&
+ is_code_point_boundary(self, range.start) &&
+ is_code_point_boundary(self, range.end) {
+ unsafe { slice_unchecked(self, range.start, range.end) }
+ } else {
+ slice_error_fail(self, range.start, range.end)
+ }
+ }
+}
+
+/// Return a slice of the given string from byte `begin` to its end.
+///
+/// # Panics
+///
+/// Panics when `begin` is not at a code point boundary,
+/// or is beyond the end of the string.
+impl ops::Index<ops::RangeFrom<usize>> for Wtf8 {
+ type Output = Wtf8;
+
+ #[inline]
+ fn index(&self, range: &ops::RangeFrom<usize>) -> &Wtf8 {
+ // is_code_point_boundary checks that the index is in [0, .len()]
+ if is_code_point_boundary(self, range.start) {
+ unsafe { slice_unchecked(self, range.start, self.len()) }
+ } else {
+ slice_error_fail(self, range.start, self.len())
+ }
+ }
+}
+
+/// Return a slice of the given string from its beginning to byte `end`.
+///
+/// # Panics
+///
+/// Panics when `end` is not at a code point boundary,
+/// or is beyond the end of the string.
+impl ops::Index<ops::RangeTo<usize>> for Wtf8 {
+ type Output = Wtf8;
+
+ #[inline]
+ fn index(&self, range: &ops::RangeTo<usize>) -> &Wtf8 {
+ // is_code_point_boundary checks that the index is in [0, .len()]
+ if is_code_point_boundary(self, range.end) {
+ unsafe { slice_unchecked(self, 0, range.end) }
+ } else {
+ slice_error_fail(self, 0, range.end)
+ }
+ }
+}
+
+impl ops::Index<ops::FullRange> for Wtf8 {
+ type Output = Wtf8;
+
+ #[inline]
+ fn index(&self, _range: &ops::FullRange) -> &Wtf8 {
+ self
+ }
+}
+
+#[inline]
+fn decode_surrogate(second_byte: u8, third_byte: u8) -> u16 {
+ // The first byte is assumed to be 0xED
+ 0xD800 | (second_byte as u16 & 0x3F) << 6 | third_byte as u16 & 0x3F
+}
+
+#[inline]
+fn decode_surrogate_pair(lead: u16, trail: u16) -> char {
+ let code_point = 0x10000 + ((((lead - 0xD800) as u32) << 10) | (trail - 0xDC00) as u32);
+ unsafe { mem::transmute(code_point) }
+}
+
+/// Copied from core::str::StrPrelude::is_char_boundary
+#[inline]
+pub fn is_code_point_boundary(slice: &Wtf8, index: uint) -> bool {
+ if index == slice.len() { return true; }
+ match slice.bytes.get(index) {
+ None => false,
+ Some(&b) => b < 128u8 || b >= 192u8,
+ }
+}
+
+/// Copied from core::str::raw::slice_unchecked
+#[inline]
+pub unsafe fn slice_unchecked(s: &Wtf8, begin: uint, end: uint) -> &Wtf8 {
+ mem::transmute(RawSlice {
+ data: s.bytes.as_ptr().offset(begin as int),
+ len: end - begin,
+ })
+}
+
+/// Copied from core::str::raw::slice_error_fail
+#[inline(never)]
+pub fn slice_error_fail(s: &Wtf8, begin: uint, end: uint) -> ! {
+ assert!(begin <= end);
+ panic!("index {} and/or {} in `{:?}` do not lie on character boundary",
+ begin, end, s);
+}
+
+/// Iterator for the code points of a WTF-8 string.
+///
+/// Created with the method `.code_points()`.
+#[derive(Clone)]
+pub struct Wtf8CodePoints<'a> {
+ bytes: slice::Iter<'a, u8>
+}
+
+impl<'a> Iterator for Wtf8CodePoints<'a> {
+ type Item = CodePoint;
+
+ #[inline]
+ fn next(&mut self) -> Option<CodePoint> {
+ next_code_point(&mut self.bytes).map(|c| CodePoint { value: c })
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (uint, Option<uint>) {
+ let (len, _) = self.bytes.size_hint();
+ (len.saturating_add(3) / 4, Some(len))
+ }
+}
+
+#[derive(Clone)]
+pub struct EncodeWide<'a> {
+ code_points: Wtf8CodePoints<'a>,
+ extra: u16
+}
+
+// Copied from libunicode/u_str.rs
+impl<'a> Iterator for EncodeWide<'a> {
+ type Item = u16;
+
+ #[inline]
+ fn next(&mut self) -> Option<u16> {
+ if self.extra != 0 {
+ let tmp = self.extra;
+ self.extra = 0;
+ return Some(tmp);
+ }
+
+ let mut buf = [0u16; 2];
+ self.code_points.next().map(|code_point| {
+ let n = encode_utf16_raw(code_point.value, buf.as_mut_slice())
+ .unwrap_or(0);
+ if n == 2 { self.extra = buf[1]; }
+ buf[0]
+ })
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (uint, Option<uint>) {
+ let (low, high) = self.code_points.size_hint();
+ // every code point gets either one u16 or two u16,
+ // so this iterator is between 1 or 2 times as
+ // long as the underlying iterator.
+ (low, high.and_then(|n| n.checked_mul(2)))
+ }
+}
+
+impl<S: Writer + Hasher> Hash<S> for CodePoint {
+ #[inline]
+ fn hash(&self, state: &mut S) {
+ self.value.hash(state)
+ }
+}
+
+impl<S: Writer + Hasher> Hash<S> for Wtf8Buf {
+ #[inline]
+ fn hash(&self, state: &mut S) {
+ state.write(self.bytes.as_slice());
+ 0xfeu8.hash(state)
+ }
+}
+
+impl<'a, S: Writer + Hasher> Hash<S> for Wtf8 {
+ #[inline]
+ fn hash(&self, state: &mut S) {
+ state.write(&self.bytes);
+ 0xfeu8.hash(state)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use prelude::v1::*;
+ use borrow::Cow;
+ use super::*;
+ use mem::transmute;
+ use string::CowString;
+
+ #[test]
+ fn code_point_from_u32() {
+ assert!(CodePoint::from_u32(0).is_some());
+ assert!(CodePoint::from_u32(0xD800).is_some());
+ assert!(CodePoint::from_u32(0x10FFFF).is_some());
+ assert!(CodePoint::from_u32(0x110000).is_none());
+ }
+
+ #[test]
+ fn code_point_to_u32() {
+ fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ assert_eq!(c(0).to_u32(), 0);
+ assert_eq!(c(0xD800).to_u32(), 0xD800);
+ assert_eq!(c(0x10FFFF).to_u32(), 0x10FFFF);
+ }
+
+ #[test]
+ fn code_point_from_char() {
+ assert_eq!(CodePoint::from_char('a').to_u32(), 0x61);
+ assert_eq!(CodePoint::from_char('💩').to_u32(), 0x1F4A9);
+ }
+
+ #[test]
+ fn code_point_to_string() {
+ assert_eq!(format!("{:?}", CodePoint::from_char('a')).as_slice(), "U+0061");
+ assert_eq!(format!("{:?}", CodePoint::from_char('💩')).as_slice(), "U+1F4A9");
+ }
+
+ #[test]
+ fn code_point_to_char() {
+ fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ assert_eq!(c(0x61).to_char(), Some('a'));
+ assert_eq!(c(0x1F4A9).to_char(), Some('💩'));
+ assert_eq!(c(0xD800).to_char(), None);
+ }
+
+ #[test]
+ fn code_point_to_char_lossy() {
+ fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ assert_eq!(c(0x61).to_char_lossy(), 'a');
+ assert_eq!(c(0x1F4A9).to_char_lossy(), '💩');
+ assert_eq!(c(0xD800).to_char_lossy(), '\u{FFFD}');
+ }
+
+ #[test]
+ fn wtf8buf_new() {
+ assert_eq!(Wtf8Buf::new().bytes.as_slice(), b"");
+ }
+
+ #[test]
+ fn wtf8buf_from_str() {
+ assert_eq!(Wtf8Buf::from_str("").bytes.as_slice(), b"");
+ assert_eq!(Wtf8Buf::from_str("aé 💩").bytes.as_slice(),
+ b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ fn wtf8buf_from_string() {
+ assert_eq!(Wtf8Buf::from_string(String::from_str("")).bytes.as_slice(), b"");
+ assert_eq!(Wtf8Buf::from_string(String::from_str("aé 💩")).bytes.as_slice(),
+ b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ fn wtf8buf_from_wide() {
+ assert_eq!(Wtf8Buf::from_wide(&[]).bytes.as_slice(), b"");
+ assert_eq!(Wtf8Buf::from_wide(
+ &[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]).bytes.as_slice(),
+ b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ fn wtf8buf_push_str() {
+ let mut string = Wtf8Buf::new();
+ assert_eq!(string.bytes.as_slice(), b"");
+ string.push_str("aé 💩");
+ assert_eq!(string.bytes.as_slice(), b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ fn wtf8buf_push_char() {
+ let mut string = Wtf8Buf::from_str("aé ");
+ assert_eq!(string.bytes.as_slice(), b"a\xC3\xA9 ");
+ string.push_char('💩');
+ assert_eq!(string.bytes.as_slice(), b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ fn wtf8buf_push() {
+ let mut string = Wtf8Buf::from_str("aé ");
+ assert_eq!(string.bytes.as_slice(), b"a\xC3\xA9 ");
+ string.push(CodePoint::from_char('💩'));
+ assert_eq!(string.bytes.as_slice(), b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+
+ fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD83D)); // lead
+ string.push(c(0xDCA9)); // trail
+ assert_eq!(string.bytes.as_slice(), b"\xF0\x9F\x92\xA9"); // Magic!
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD83D)); // lead
+ string.push(c(0x20)); // not surrogate
+ string.push(c(0xDCA9)); // trail
+ assert_eq!(string.bytes.as_slice(), b"\xED\xA0\xBD \xED\xB2\xA9");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD800)); // lead
+ string.push(c(0xDBFF)); // lead
+ assert_eq!(string.bytes.as_slice(), b"\xED\xA0\x80\xED\xAF\xBF");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD800)); // lead
+ string.push(c(0xE000)); // not surrogate
+ assert_eq!(string.bytes.as_slice(), b"\xED\xA0\x80\xEE\x80\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD7FF)); // not surrogate
+ string.push(c(0xDC00)); // trail
+ assert_eq!(string.bytes.as_slice(), b"\xED\x9F\xBF\xED\xB0\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0x61)); // not surrogate, < 3 bytes
+ string.push(c(0xDC00)); // trail
+ assert_eq!(string.bytes.as_slice(), b"\x61\xED\xB0\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xDC00)); // trail
+ assert_eq!(string.bytes.as_slice(), b"\xED\xB0\x80");
+ }
+
+ #[test]
+ fn wtf8buf_push_wtf8() {
+ let mut string = Wtf8Buf::from_str("aé");
+ assert_eq!(string.bytes.as_slice(), b"a\xC3\xA9");
+ string.push_wtf8(Wtf8::from_str(" 💩"));
+ assert_eq!(string.bytes.as_slice(), b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+
+ fn w(value: &[u8]) -> &Wtf8 { unsafe { transmute(value) } }
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
+ string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
+ assert_eq!(string.bytes.as_slice(), b"\xF0\x9F\x92\xA9"); // Magic!
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
+ string.push_wtf8(w(b" ")); // not surrogate
+ string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
+ assert_eq!(string.bytes.as_slice(), b"\xED\xA0\xBD \xED\xB2\xA9");
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xA0\x80")); // lead
+ string.push_wtf8(w(b"\xED\xAF\xBF")); // lead
+ assert_eq!(string.bytes.as_slice(), b"\xED\xA0\x80\xED\xAF\xBF");
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xA0\x80")); // lead
+ string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate
+ assert_eq!(string.bytes.as_slice(), b"\xED\xA0\x80\xEE\x80\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate
+ string.push_wtf8(w(b"\xED\xB0\x80")); // trail
+ assert_eq!(string.bytes.as_slice(), b"\xED\x9F\xBF\xED\xB0\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes
+ string.push_wtf8(w(b"\xED\xB0\x80")); // trail
+ assert_eq!(string.bytes.as_slice(), b"\x61\xED\xB0\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xB0\x80")); // trail
+ assert_eq!(string.bytes.as_slice(), b"\xED\xB0\x80");
+ }
+
+ #[test]
+ fn wtf8buf_truncate() {
+ let mut string = Wtf8Buf::from_str("aé");
+ string.truncate(1);
+ assert_eq!(string.bytes.as_slice(), b"a");
+ }
+
+ #[test]
+ #[should_fail]
+ fn wtf8buf_truncate_fail_code_point_boundary() {
+ let mut string = Wtf8Buf::from_str("aé");
+ string.truncate(2);
+ }
+
+ #[test]
+ #[should_fail]
+ fn wtf8buf_truncate_fail_longer() {
+ let mut string = Wtf8Buf::from_str("aé");
+ string.truncate(4);
+ }
+
+ #[test]
+ fn wtf8buf_into_string() {
+ let mut string = Wtf8Buf::from_str("aé 💩");
+ assert_eq!(string.clone().into_string(), Ok(String::from_str("aé 💩")));
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(string.clone().into_string(), Err(string));
+ }
+
+ #[test]
+ fn wtf8buf_into_string_lossy() {
+ let mut string = Wtf8Buf::from_str("aé 💩");
+ assert_eq!(string.clone().into_string_lossy(), String::from_str("aé 💩"));
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(string.clone().into_string_lossy(), String::from_str("aé 💩�"));
+ }
+
+ #[test]
+ fn wtf8buf_from_iterator() {
+ fn f(values: &[u32]) -> Wtf8Buf {
+ values.iter().map(|&c| CodePoint::from_u32(c).unwrap()).collect::<Wtf8Buf>()
+ };
+ assert_eq!(f(&[0x61, 0xE9, 0x20, 0x1F4A9]).bytes.as_slice(), b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+
+ assert_eq!(f(&[0xD83D, 0xDCA9]).bytes.as_slice(), b"\xF0\x9F\x92\xA9"); // Magic!
+ assert_eq!(f(&[0xD83D, 0x20, 0xDCA9]).bytes.as_slice(), b"\xED\xA0\xBD \xED\xB2\xA9");
+ assert_eq!(f(&[0xD800, 0xDBFF]).bytes.as_slice(), b"\xED\xA0\x80\xED\xAF\xBF");
+ assert_eq!(f(&[0xD800, 0xE000]).bytes.as_slice(), b"\xED\xA0\x80\xEE\x80\x80");
+ assert_eq!(f(&[0xD7FF, 0xDC00]).bytes.as_slice(), b"\xED\x9F\xBF\xED\xB0\x80");
+ assert_eq!(f(&[0x61, 0xDC00]).bytes.as_slice(), b"\x61\xED\xB0\x80");
+ assert_eq!(f(&[0xDC00]).bytes.as_slice(), b"\xED\xB0\x80");
+ }
+
+ #[test]
+ fn wtf8buf_extend() {
+ fn e(initial: &[u32], extended: &[u32]) -> Wtf8Buf {
+ fn c(value: &u32) -> CodePoint { CodePoint::from_u32(*value).unwrap() }
+ let mut string = initial.iter().map(c).collect::<Wtf8Buf>();
+ string.extend(extended.iter().map(c));
+ string
+ };
+
+ assert_eq!(e(&[0x61, 0xE9], &[0x20, 0x1F4A9]).bytes.as_slice(),
+ b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+
+ assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes.as_slice(), b"\xF0\x9F\x92\xA9"); // Magic!
+ assert_eq!(e(&[0xD83D, 0x20], &[0xDCA9]).bytes.as_slice(), b"\xED\xA0\xBD \xED\xB2\xA9");
+ assert_eq!(e(&[0xD800], &[0xDBFF]).bytes.as_slice(), b"\xED\xA0\x80\xED\xAF\xBF");
+ assert_eq!(e(&[0xD800], &[0xE000]).bytes.as_slice(), b"\xED\xA0\x80\xEE\x80\x80");
+ assert_eq!(e(&[0xD7FF], &[0xDC00]).bytes.as_slice(), b"\xED\x9F\xBF\xED\xB0\x80");
+ assert_eq!(e(&[0x61], &[0xDC00]).bytes.as_slice(), b"\x61\xED\xB0\x80");
+ assert_eq!(e(&[], &[0xDC00]).bytes.as_slice(), b"\xED\xB0\x80");
+ }
+
+ #[test]
+ fn wtf8buf_show() {
+ let mut string = Wtf8Buf::from_str("aé 💩");
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(format!("{:?}", string).as_slice(), r#""aé 💩\u{D800}""#);
+ }
+
+ #[test]
+ fn wtf8buf_as_slice() {
+ assert_eq!(Wtf8Buf::from_str("aé").as_slice(), Wtf8::from_str("aé"));
+ }
+
+ #[test]
+ fn wtf8_show() {
+ let mut string = Wtf8Buf::from_str("aé 💩");
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(format!("{:?}", string.as_slice()).as_slice(), r#""aé 💩\u{D800}""#);
+ }
+
+ #[test]
+ fn wtf8_from_str() {
+ assert_eq!(&Wtf8::from_str("").bytes, b"");
+ assert_eq!(&Wtf8::from_str("aé 💩").bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ fn wtf8_len() {
+ assert_eq!(Wtf8::from_str("").len(), 0);
+ assert_eq!(Wtf8::from_str("aé 💩").len(), 8);
+ }
+
+ #[test]
+ fn wtf8_slice() {
+ assert_eq!(&Wtf8::from_str("aé 💩")[1.. 4].bytes, b"\xC3\xA9 ");
+ }
+
+ #[test]
+ #[should_fail]
+ fn wtf8_slice_not_code_point_boundary() {
+ &Wtf8::from_str("aé 💩")[2.. 4];
+ }
+
+ #[test]
+ fn wtf8_slice_from() {
+ assert_eq!(&Wtf8::from_str("aé 💩")[1..].bytes, b"\xC3\xA9 \xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ #[should_fail]
+ fn wtf8_slice_from_not_code_point_boundary() {
+ &Wtf8::from_str("aé 💩")[2..];
+ }
+
+ #[test]
+ fn wtf8_slice_to() {
+ assert_eq!(&Wtf8::from_str("aé 💩")[..4].bytes, b"a\xC3\xA9 ");
+ }
+
+ #[test]
+ #[should_fail]
+ fn wtf8_slice_to_not_code_point_boundary() {
+ &Wtf8::from_str("aé 💩")[5..];
+ }
+
+ #[test]
+ fn wtf8_ascii_byte_at() {
+ let slice = Wtf8::from_str("aé 💩");
+ assert_eq!(slice.ascii_byte_at(0), b'a');
+ assert_eq!(slice.ascii_byte_at(1), b'\xFF');
+ assert_eq!(slice.ascii_byte_at(2), b'\xFF');
+ assert_eq!(slice.ascii_byte_at(3), b' ');
+ assert_eq!(slice.ascii_byte_at(4), b'\xFF');
+ }
+
+ #[test]
+ fn wtf8_code_point_at() {
+ let mut string = Wtf8Buf::from_str("aé ");
+ string.push(CodePoint::from_u32(0xD83D).unwrap());
+ string.push_char('💩');
+ assert_eq!(string.code_point_at(0), CodePoint::from_char('a'));
+ assert_eq!(string.code_point_at(1), CodePoint::from_char('é'));
+ assert_eq!(string.code_point_at(3), CodePoint::from_char(' '));
+ assert_eq!(string.code_point_at(4), CodePoint::from_u32(0xD83D).unwrap());
+ assert_eq!(string.code_point_at(7), CodePoint::from_char('💩'));
+ }
+
+ #[test]
+ fn wtf8_code_point_range_at() {
+ let mut string = Wtf8Buf::from_str("aé ");
+ string.push(CodePoint::from_u32(0xD83D).unwrap());
+ string.push_char('💩');
+ assert_eq!(string.code_point_range_at(0), (CodePoint::from_char('a'), 1));
+ assert_eq!(string.code_point_range_at(1), (CodePoint::from_char('é'), 3));
+ assert_eq!(string.code_point_range_at(3), (CodePoint::from_char(' '), 4));
+ assert_eq!(string.code_point_range_at(4), (CodePoint::from_u32(0xD83D).unwrap(), 7));
+ assert_eq!(string.code_point_range_at(7), (CodePoint::from_char('💩'), 11));
+ }
+
+ #[test]
+ fn wtf8_code_points() {
+ fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ fn cp(string: &Wtf8Buf) -> Vec<Option<char>> {
+ string.code_points().map(|c| c.to_char()).collect::<Vec<_>>()
+ }
+ let mut string = Wtf8Buf::from_str("é ");
+ assert_eq!(cp(&string), vec![Some('é'), Some(' ')]);
+ string.push(c(0xD83D));
+ assert_eq!(cp(&string), vec![Some('é'), Some(' '), None]);
+ string.push(c(0xDCA9));
+ assert_eq!(cp(&string), vec![Some('é'), Some(' '), Some('💩')]);
+ }
+
+ #[test]
+ fn wtf8_as_str() {
+ assert_eq!(Wtf8::from_str("").as_str(), Some(""));
+ assert_eq!(Wtf8::from_str("aé 💩").as_str(), Some("aé 💩"));
+ let mut string = Wtf8Buf::new();
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(string.as_str(), None);
+ }
+
+ #[test]
+ fn wtf8_to_string_lossy() {
+ assert_eq!(Wtf8::from_str("").to_string_lossy(), Cow::Borrowed(""));
+ assert_eq!(Wtf8::from_str("aé 💩").to_string_lossy(), Cow::Borrowed("aé 💩"));
+ let mut string = Wtf8Buf::from_str("aé 💩");
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ let expected: CowString = Cow::Owned(String::from_str("aé 💩�"));
+ assert_eq!(string.to_string_lossy(), expected);
+ }
+
+ #[test]
+ fn wtf8_encode_wide() {
+ let mut string = Wtf8Buf::from_str("aé ");
+ string.push(CodePoint::from_u32(0xD83D).unwrap());
+ string.push_char('💩');
+ assert_eq!(string.encode_wide().collect::<Vec<_>>(),
+ vec![0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]);
+ }
+}
#![unstable]
-use sys_common::AsInner;
+use vec::Vec;
+use sys::os_str::Buf;
+use sys_common::{AsInner, IntoInner, FromInner};
+use ffi::{OsStr, OsString};
use libc;
use io;
}
}
+// Unix-specific extensions to `OsString`.
+pub trait OsStringExt {
+ /// Create an `OsString` from a byte vector.
+ fn from_vec(vec: Vec<u8>) -> Self;
+
+ /// Yield the underlying byte vector of this `OsString`.
+ fn into_vec(self) -> Vec<u8>;
+}
+
+impl OsStringExt for OsString {
+ fn from_vec(vec: Vec<u8>) -> OsString {
+ FromInner::from_inner(Buf { inner: vec })
+ }
+
+ fn into_vec(self) -> Vec<u8> {
+ self.into_inner().inner
+ }
+}
+
+// Unix-specific extensions to `OsStr`.
+pub trait OsStrExt {
+ fn as_byte_slice(&self) -> &[u8];
+}
+
+impl OsStrExt for OsStr {
+ fn as_byte_slice(&self) -> &[u8] {
+ &self.as_inner().inner
+ }
+}
+
/// A prelude for conveniently writing platform-specific code.
///
/// Includes all extension traits, and some important type definitions.
pub mod helper_signal;
pub mod mutex;
pub mod os;
+pub mod os_str;
pub mod pipe;
pub mod process;
pub mod rwlock;
--- /dev/null
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/// The underlying OsString/OsStr implementation on Unix systems: just
+/// a `Vec<u8>`/`[u8]`.
+
+use core::prelude::*;
+
+use fmt::{self, Debug};
+use vec::Vec;
+use slice::SliceExt as StdSliceExt;
+use str;
+use string::{String, CowString};
+use mem;
+
+#[derive(Clone)]
+pub struct Buf {
+ pub inner: Vec<u8>
+}
+
+pub struct Slice {
+ pub inner: [u8]
+}
+
+impl Debug for Slice {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ self.to_string_lossy().fmt(formatter)
+ }
+}
+
+impl Debug for Buf {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ self.as_slice().fmt(formatter)
+ }
+}
+
+impl Buf {
+ pub fn from_string(s: String) -> Buf {
+ Buf { inner: s.into_bytes() }
+ }
+
+ pub fn from_str(s: &str) -> Buf {
+ Buf { inner: s.as_bytes().to_vec() }
+ }
+
+ pub fn as_slice(&self) -> &Slice {
+ unsafe { mem::transmute(self.inner.as_slice()) }
+ }
+
+ pub fn into_string(self) -> Result<String, Buf> {
+ String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() } )
+ }
+
+ pub fn push_slice(&mut self, s: &Slice) {
+ self.inner.push_all(&s.inner)
+ }
+}
+
+impl Slice {
+ fn from_u8_slice(s: &[u8]) -> &Slice {
+ unsafe { mem::transmute(s) }
+ }
+
+ pub fn from_str(s: &str) -> &Slice {
+ unsafe { mem::transmute(s.as_bytes()) }
+ }
+
+ pub fn to_str(&self) -> Option<&str> {
+ str::from_utf8(&self.inner).ok()
+ }
+
+ pub fn to_string_lossy(&self) -> CowString {
+ String::from_utf8_lossy(&self.inner)
+ }
+
+ pub fn to_owned(&self) -> Buf {
+ Buf { inner: self.inner.to_vec() }
+ }
+}
#![unstable]
-use sys_common::AsInner;
+pub use sys_common::wtf8::{Wtf8Buf, EncodeWide};
+
+use sys::os_str::Buf;
+use sys_common::{AsInner, FromInner};
+use ffi::{OsStr, OsString};
use libc;
use io;
}
}
+// Windows-specific extensions to `OsString`.
+pub trait OsStringExt {
+ /// Create an `OsString` from a potentially ill-formed UTF-16 slice of 16-bit code units.
+ ///
+ /// This is lossless: calling `.encode_wide()` on the resulting string
+ /// will always return the original code units.
+ fn from_wide(wide: &[u16]) -> Self;
+}
+
+impl OsStringExt for OsString {
+ fn from_wide(wide: &[u16]) -> OsString {
+ FromInner::from_inner(Buf { inner: Wtf8Buf::from_wide(wide) })
+ }
+}
+
+// Windows-specific extensions to `OsStr`.
+pub trait OsStrExt {
+ fn encode_wide(&self) -> EncodeWide;
+}
+
+impl OsStrExt for OsStr {
+ fn encode_wide(&self) -> EncodeWide {
+ self.as_inner().inner.encode_wide()
+ }
+}
+
/// A prelude for conveniently writing platform-specific code.
///
/// Includes all extension traits, and some important type definitions.
pub mod prelude {
- pub use super::{Socket, Handle, AsRawSocket, AsRawHandle};
+ pub use super::{Socket, Handle, AsRawSocket, AsRawHandle, OsStrExt, OsStringExt};
}
pub mod helper_signal;
pub mod mutex;
pub mod os;
+pub mod os_str;
pub mod pipe;
pub mod process;
pub mod rwlock;
--- /dev/null
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/// The underlying OsString/OsStr implementation on Windows is a
+/// wrapper around the "WTF-8" encoding; see the `wtf8` module for more.
+
+use fmt::{self, Debug};
+use sys_common::wtf8::{Wtf8, Wtf8Buf};
+use string::{String, CowString};
+use result::Result;
+use option::Option;
+use mem;
+
+#[derive(Clone)]
+pub struct Buf {
+ pub inner: Wtf8Buf
+}
+
+impl Debug for Buf {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ self.as_slice().fmt(formatter)
+ }
+}
+
+pub struct Slice {
+ pub inner: Wtf8
+}
+
+impl Debug for Slice {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ self.inner.fmt(formatter)
+ }
+}
+
+impl Buf {
+ pub fn from_string(s: String) -> Buf {
+ Buf { inner: Wtf8Buf::from_string(s) }
+ }
+
+ pub fn from_str(s: &str) -> Buf {
+ Buf { inner: Wtf8Buf::from_str(s) }
+ }
+
+ pub fn as_slice(&self) -> &Slice {
+ unsafe { mem::transmute(self.inner.as_slice()) }
+ }
+
+ pub fn into_string(self) -> Result<String, Buf> {
+ self.inner.into_string().map_err(|buf| Buf { inner: buf })
+ }
+
+ pub fn push_slice(&mut self, s: &Slice) {
+ self.inner.push_wtf8(&s.inner)
+ }
+}
+
+impl Slice {
+ pub fn from_str(s: &str) -> &Slice {
+ unsafe { mem::transmute(Wtf8::from_str(s)) }
+ }
+
+ pub fn to_str(&self) -> Option<&str> {
+ self.inner.as_str()
+ }
+
+ pub fn to_string_lossy(&self) -> CowString {
+ self.inner.to_string_lossy()
+ }
+
+ pub fn to_owned(&self) -> Buf {
+ let mut buf = Wtf8Buf::with_capacity(self.inner.len());
+ buf.push_wtf8(&self.inner);
+ Buf { inner: buf }
+ }
+}
pub use self::AsmDialect::*;
pub use self::AttrStyle::*;
pub use self::BindingMode::*;
-pub use self::BinOp::*;
+pub use self::BinOp_::*;
pub use self::BlockCheckMode::*;
pub use self::CaptureClause::*;
pub use self::Decl_::*;
}
#[derive(Clone, PartialEq, Eq, RustcEncodable, RustcDecodable, Hash, Show, Copy)]
-pub enum BinOp {
+pub enum BinOp_ {
BiAdd,
BiSub,
BiMul,
BiGt,
}
+pub type BinOp = Spanned<BinOp_>;
+
#[derive(Clone, PartialEq, Eq, RustcEncodable, RustcDecodable, Hash, Show, Copy)]
pub enum UnOp {
UnUniq,
}
}
-pub fn binop_to_string(op: BinOp) -> &'static str {
+pub fn binop_to_string(op: BinOp_) -> &'static str {
match op {
BiAdd => "+",
BiSub => "-",
}
}
-pub fn lazy_binop(b: BinOp) -> bool {
+pub fn lazy_binop(b: BinOp_) -> bool {
match b {
BiAnd => true,
BiOr => true,
}
}
-pub fn is_shift_binop(b: BinOp) -> bool {
+pub fn is_shift_binop(b: BinOp_) -> bool {
match b {
BiShl => true,
BiShr => true,
}
}
-pub fn is_comparison_binop(b: BinOp) -> bool {
+pub fn is_comparison_binop(b: BinOp_) -> bool {
match b {
BiEq | BiLt | BiLe | BiNe | BiGt | BiGe => true,
_ => false
}
/// Returns `true` if the binary operator takes its arguments by value
-pub fn is_by_value_binop(b: BinOp) -> bool {
+pub fn is_by_value_binop(b: BinOp_) -> bool {
match b {
BiAdd | BiSub | BiMul | BiDiv | BiRem | BiBitXor | BiBitAnd | BiBitOr | BiShl | BiShr => {
true
}
/// Maps a binary operator to its precedence
-pub fn operator_prec(op: ast::BinOp) -> usize {
+pub fn operator_prec(op: ast::BinOp_) -> usize {
match op {
// 'as' sits here with 12
BiMul | BiDiv | BiRem => 11us,
fn expr_ident(&self, span: Span, id: ast::Ident) -> P<ast::Expr>;
fn expr_self(&self, span: Span) -> P<ast::Expr>;
- fn expr_binary(&self, sp: Span, op: ast::BinOp,
+ fn expr_binary(&self, sp: Span, op: ast::BinOp_,
lhs: P<ast::Expr>, rhs: P<ast::Expr>) -> P<ast::Expr>;
fn expr_deref(&self, sp: Span, e: P<ast::Expr>) -> P<ast::Expr>;
fn expr_unary(&self, sp: Span, op: ast::UnOp, e: P<ast::Expr>) -> P<ast::Expr>;
self.expr_ident(span, special_idents::self_)
}
- fn expr_binary(&self, sp: Span, op: ast::BinOp,
+ fn expr_binary(&self, sp: Span, op: ast::BinOp_,
lhs: P<ast::Expr>, rhs: P<ast::Expr>) -> P<ast::Expr> {
- self.expr(sp, ast::ExprBinary(op, lhs, rhs))
+ self.expr(sp, ast::ExprBinary(Spanned { node: op, span: sp }, lhs, rhs))
}
fn expr_deref(&self, sp: Span, e: P<ast::Expr>) -> P<ast::Expr> {
/// Use a given binop to combine the result of calling the derived method
/// on all the fields.
#[inline]
-pub fn cs_binop(binop: ast::BinOp, base: P<Expr>,
+pub fn cs_binop(binop: ast::BinOp_, base: P<Expr>,
enum_nonmatch_f: EnumNonMatchCollapsedFunc,
cx: &mut ExtCtxt, trait_span: Span,
substructure: &Substructure) -> P<Expr> {
self.expected_tokens.push(TokenType::Operator);
+ let cur_op_span = self.span;
let cur_opt = self.token.to_binop();
match cur_opt {
Some(cur_op) => {
let rhs = self.parse_more_binops(expr, cur_prec + 1);
let lhs_span = lhs.span;
let rhs_span = rhs.span;
- let binary = self.mk_binary(cur_op, lhs, rhs);
+ let binary = self.mk_binary(codemap::respan(cur_op_span, cur_op), lhs, rhs);
let bin = self.mk_expr(lhs_span.lo, rhs_span.hi, binary);
self.parse_more_binops(bin, min_prec)
} else {
/// Produce an error if comparison operators are chained (RFC #558).
/// We only need to check lhs, not rhs, because all comparison ops
/// have same precedence and are left-associative
- fn check_no_chained_comparison(&mut self, lhs: &Expr, outer_op: ast::BinOp) {
+ fn check_no_chained_comparison(&mut self, lhs: &Expr, outer_op: ast::BinOp_) {
debug_assert!(ast_util::is_comparison_binop(outer_op));
match lhs.node {
- ExprBinary(op, _, _) if ast_util::is_comparison_binop(op) => {
- let op_span = self.span;
+ ExprBinary(op, _, _) if ast_util::is_comparison_binop(op.node) => {
+ // respan to include both operators
+ let op_span = mk_sp(op.span.lo, self.span.hi);
self.span_err(op_span,
- "Chained comparison operators require parentheses");
- if op == BiLt && outer_op == BiGt {
+ "chained comparison operators require parentheses");
+ if op.node == BiLt && outer_op == BiGt {
self.span_help(op_span,
- "use ::< instead of < if you meant to specify type arguments");
+ "use `::<...>` instead of `<...>` if you meant to specify type arguments");
}
}
_ => {}
pub fn parse_assign_expr_with(&mut self, lhs: P<Expr>) -> P<Expr> {
let restrictions = self.restrictions & RESTRICTION_NO_STRUCT_LITERAL;
+ let op_span = self.span;
match self.token {
token::Eq => {
self.bump();
};
let rhs_span = rhs.span;
let span = lhs.span;
- let assign_op = self.mk_assign_op(aop, lhs, rhs);
+ let assign_op = self.mk_assign_op(codemap::respan(op_span, aop), lhs, rhs);
self.mk_expr(span.lo, rhs_span.hi, assign_op)
}
// A range expression, either `expr..expr` or `expr..`.
}
/// Maps a token to its corresponding binary operator.
- pub fn to_binop(&self) -> Option<ast::BinOp> {
+ pub fn to_binop(&self) -> Option<ast::BinOp_> {
match *self {
BinOp(Star) => Some(ast::BiMul),
BinOp(Slash) => Some(ast::BiDiv),
pub fn print_tts(&mut self, tts: &[ast::TokenTree]) -> IoResult<()> {
try!(self.ibox(0));
+ let mut suppress_space = false;
for (i, tt) in tts.iter().enumerate() {
- if i != 0 {
+ if i != 0 && !suppress_space {
try!(space(&mut self.s));
}
try!(self.print_tt(tt));
+ // There should be no space between the module name and the following `::` in paths,
+ // otherwise imported macros get re-parsed from crate metadata incorrectly (#20701)
+ suppress_space = match tt {
+ &ast::TtToken(_, token::Ident(_, token::ModName)) |
+ &ast::TtToken(_, token::MatchNt(_, _, _, token::ModName)) |
+ &ast::TtToken(_, token::SubstNt(_, token::ModName)) => true,
+ _ => false
+ }
}
self.end()
}
rhs: &ast::Expr) -> IoResult<()> {
try!(self.print_expr(lhs));
try!(space(&mut self.s));
- try!(self.word_space(ast_util::binop_to_string(op)));
+ try!(self.word_space(ast_util::binop_to_string(op.node)));
self.print_expr(rhs)
}
ast::ExprAssignOp(op, ref lhs, ref rhs) => {
try!(self.print_expr(&**lhs));
try!(space(&mut self.s));
- try!(word(&mut self.s, ast_util::binop_to_string(op)));
+ try!(word(&mut self.s, ast_util::binop_to_string(op.node)));
try!(self.word_space("="));
try!(self.print_expr(&**rhs));
}
#![allow(unstable)]
extern crate getopts;
-extern crate regex;
extern crate serialize;
extern crate "serialize" as rustc_serialize;
extern crate term;
use stats::Stats;
use getopts::{OptGroup, optflag, optopt};
-use regex::Regex;
use serialize::Encodable;
use term::Terminal;
use term::color::{Color, RED, YELLOW, GREEN, CYAN};
}
pub struct TestOpts {
- pub filter: Option<Regex>,
+ pub filter: Option<String>,
pub run_ignored: bool,
pub run_tests: bool,
pub run_benchmarks: bool,
if matches.opt_present("h") { usage(args[0].as_slice()); return None; }
let filter = if matches.free.len() > 0 {
- let s = matches.free[0].as_slice();
- match Regex::new(s) {
- Ok(re) => Some(re),
- Err(e) => return Some(Err(format!("could not parse /{}/: {:?}", s, e)))
- }
+ Some(matches.free[0].clone())
} else {
None
};
// Remove tests that don't match the test filter
filtered = match opts.filter {
None => filtered,
- Some(ref re) => {
- filtered.into_iter()
- .filter(|test| re.is_match(test.desc.name.as_slice())).collect()
+ Some(ref filter) => {
+ filtered.into_iter().filter(|test| {
+ test.desc.name.as_slice().contains(&filter[])
+ }).collect()
}
};
assert!(res == TrFailed);
}
- #[test]
- fn first_free_arg_should_be_a_filter() {
- let args = vec!("progname".to_string(), "some_regex_filter".to_string());
- let opts = match parse_opts(args.as_slice()) {
- Some(Ok(o)) => o,
- _ => panic!("Malformed arg in first_free_arg_should_be_a_filter")
- };
- assert!(opts.filter.expect("should've found filter").is_match("some_regex_filter"))
- }
-
#[test]
fn parse_ignored_flag() {
let args = vec!("progname".to_string(),
}
}
- #[test]
- pub fn filter_tests_regex() {
- let mut opts = TestOpts::new();
- opts.filter = Some(::regex::Regex::new("a.*b.+c").unwrap());
-
- let mut names = ["yes::abXc", "yes::aXXXbXXXXc",
- "no::XYZ", "no::abc"];
- names.sort();
-
- fn test_fn() {}
- let tests = names.iter().map(|name| {
- TestDescAndFn {
- desc: TestDesc {
- name: DynTestName(name.to_string()),
- ignore: false,
- should_fail: ShouldFail::No,
- },
- testfn: DynTestFn(Thunk::new(test_fn))
- }
- }).collect();
- let filtered = filter_tests(&opts, tests);
-
- let expected: Vec<&str> =
- names.iter().map(|&s| s).filter(|name| name.starts_with("yes")).collect();
-
- assert_eq!(filtered.len(), expected.len());
- for (test, expected_name) in filtered.iter().zip(expected.iter()) {
- assert_eq!(test.desc.name.as_slice(), *expected_name);
- }
- }
-
#[test]
pub fn test_metricmap_compare() {
let mut m1 = MetricMap::new();
use std::io::BufferedReader;
use std::iter;
use std::iter::AdditiveIterator;
-use regex::Regex;
pub struct BookItem {
pub title: String,
}
}
- let regex = r"(?P<indent>[\t ]*)\*[:space:]*\[(?P<title>.*)\]\((?P<path>.*)\)";
- let item_re = Regex::new(regex).unwrap();
let mut top_items = vec!();
let mut stack = vec!();
let mut errors = vec!();
}
};
- item_re.captures(&line[]).map(|cap| {
- let given_path = cap.name("path");
- let title = cap.name("title").unwrap().to_string();
-
- let path_from_root = match src.join(given_path.unwrap()).path_relative_from(src) {
- Some(p) => p,
- None => {
- errors.push(format!("paths in SUMMARY.md must be relative, \
- but path '{}' for section '{}' is not.",
- given_path.unwrap(), title));
- Path::new("")
- }
- };
- let path_to_root = Path::new(iter::repeat("../")
- .take(path_from_root.components().count() - 1)
- .collect::<String>());
- let item = BookItem {
- title: title,
- path: path_from_root,
- path_to_root: path_to_root,
- children: vec!(),
- };
- let level = cap.name("indent").unwrap().chars().map(|c| {
- match c {
- ' ' => 1us,
- '\t' => 4,
- _ => unreachable!()
- }
- }).sum() / 4 + 1;
-
- if level > stack.len() + 1 {
- errors.push(format!("section '{}' is indented too deeply; \
- found {}, expected {} or less",
- item.title, level, stack.len() + 1));
- } else if level <= stack.len() {
- collapse(&mut stack, &mut top_items, level);
+ let star_idx = match line.find_str("*") { Some(i) => i, None => continue };
+
+ let start_bracket = star_idx + line[star_idx..].find_str("[").unwrap();
+ let end_bracket = start_bracket + line[start_bracket..].find_str("](").unwrap();
+ let start_paren = end_bracket + 1;
+ let end_paren = start_paren + line[start_paren..].find_str(")").unwrap();
+
+ let given_path = &line[start_paren + 1 .. end_paren];
+ let title = line[start_bracket + 1..end_bracket].to_string();
+ let indent = &line[..star_idx];
+
+ let path_from_root = match src.join(given_path).path_relative_from(src) {
+ Some(p) => p,
+ None => {
+ errors.push(format!("paths in SUMMARY.md must be relative, \
+ but path '{}' for section '{}' is not.",
+ given_path, title));
+ Path::new("")
}
- stack.push(item)
- });
+ };
+ let path_to_root = Path::new(iter::repeat("../")
+ .take(path_from_root.components().count() - 1)
+ .collect::<String>());
+ let item = BookItem {
+ title: title,
+ path: path_from_root,
+ path_to_root: path_to_root,
+ children: vec!(),
+ };
+ let level = indent.chars().map(|c| {
+ match c {
+ ' ' => 1us,
+ '\t' => 4,
+ _ => unreachable!()
+ }
+ }).sum() / 4 + 1;
+
+ if level > stack.len() + 1 {
+ errors.push(format!("section '{}' is indented too deeply; \
+ found {}, expected {} or less",
+ item.title, level, stack.len() + 1));
+ } else if level <= stack.len() {
+ collapse(&mut stack, &mut top_items, level);
+ }
+ stack.push(item)
}
if errors.is_empty() {
use css;
use javascript;
-use regex::Regex;
-
use rustdoc;
struct Build;
let out_path = tgt.join(item.path.dirname());
- let regex = r"\[(?P<title>[^]]*)\]\((?P<url_stem>[^)]*)\.(?P<ext>md|markdown)\)";
- let md_urls = Regex::new(regex).unwrap();
-
let src;
if os::args().len() < 3 {
src = os::getcwd().unwrap().clone();
let markdown_data = try!(File::open(&src.join(&item.path)).read_to_string());
let preprocessed_path = tmp.path().join(item.path.filename().unwrap());
{
- let urls = md_urls.replace_all(&markdown_data[], "[$title]($url_stem.html)");
+ let urls = markdown_data.replace(".md)", ".html)");
try!(File::create(&preprocessed_path)
.write_str(&urls[]));
}
#![feature(slicing_syntax, box_syntax)]
#![allow(unstable)]
-extern crate regex;
-
extern crate rustdoc;
use std::os;
--- /dev/null
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![crate_type = "lib"]
+
+#[macro_export]
+macro_rules! declare {
+ () => (
+ pub fn aaa() {}
+
+ pub mod bbb {
+ use super::aaa;
+
+ pub fn ccc() {
+ aaa();
+ }
+ }
+ )
+}
+++ /dev/null
-// The Computer Language Benchmarks Game
-// http://benchmarksgame.alioth.debian.org/
-//
-// contributed by the Rust Project Developers
-
-// Copyright (c) 2014 The Rust Project Developers
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//
-// - Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in
-// the documentation and/or other materials provided with the
-// distribution.
-//
-// - Neither the name of "The Computer Language Benchmarks Game" nor
-// the name of "The Computer Language Shootout Benchmarks" nor the
-// names of its contributors may be used to endorse or promote
-// products derived from this software without specific prior
-// written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-// OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ignore-stage1
-// ignore-cross-compile #12102
-
-#![feature(box_syntax)]
-
-extern crate regex;
-
-use std::io;
-use regex::{NoExpand, Regex};
-use std::sync::{Arc, Future};
-
-macro_rules! regex {
- ($e:expr) => (Regex::new($e).unwrap())
-}
-
-fn count_matches(seq: &str, variant: &Regex) -> int {
- let mut n = 0;
- for _ in variant.find_iter(seq) {
- n += 1;
- }
- n
-}
-
-fn main() {
- let mut rdr = if std::os::getenv("RUST_BENCH").is_some() {
- let fd = io::File::open(&Path::new("shootout-k-nucleotide.data"));
- box io::BufferedReader::new(fd) as Box<io::Reader>
- } else {
- box io::stdin() as Box<io::Reader>
- };
- let mut seq = rdr.read_to_string().unwrap();
- let ilen = seq.len();
-
- seq = regex!(">[^\n]*\n|\n").replace_all(seq.as_slice(), NoExpand(""));
- let seq_arc = Arc::new(seq.clone()); // copy before it moves
- let clen = seq.len();
-
- let mut seqlen = Future::spawn(move|| {
- let substs = vec![
- (regex!("B"), "(c|g|t)"),
- (regex!("D"), "(a|g|t)"),
- (regex!("H"), "(a|c|t)"),
- (regex!("K"), "(g|t)"),
- (regex!("M"), "(a|c)"),
- (regex!("N"), "(a|c|g|t)"),
- (regex!("R"), "(a|g)"),
- (regex!("S"), "(c|g)"),
- (regex!("V"), "(a|c|g)"),
- (regex!("W"), "(a|t)"),
- (regex!("Y"), "(c|t)"),
- ];
- let mut seq = seq;
- for (re, replacement) in substs.into_iter() {
- seq = re.replace_all(seq.as_slice(), NoExpand(replacement));
- }
- seq.len()
- });
-
- let variants = vec![
- regex!("agggtaaa|tttaccct"),
- regex!("[cgt]gggtaaa|tttaccc[acg]"),
- regex!("a[act]ggtaaa|tttacc[agt]t"),
- regex!("ag[act]gtaaa|tttac[agt]ct"),
- regex!("agg[act]taaa|ttta[agt]cct"),
- regex!("aggg[acg]aaa|ttt[cgt]ccct"),
- regex!("agggt[cgt]aa|tt[acg]accct"),
- regex!("agggta[cgt]a|t[acg]taccct"),
- regex!("agggtaa[cgt]|[acg]ttaccct"),
- ];
- let (mut variant_strs, mut counts) = (vec!(), vec!());
- for variant in variants.into_iter() {
- let seq_arc_copy = seq_arc.clone();
- variant_strs.push(variant.to_string());
- counts.push(Future::spawn(move|| {
- count_matches(seq_arc_copy.as_slice(), &variant)
- }));
- }
-
- for (i, variant) in variant_strs.iter().enumerate() {
- println!("{} {}", variant, counts[i].get());
- }
- println!("");
- println!("{}", ilen);
- println!("{}", clen);
- println!("{}", seqlen.get());
-}
fn main() {
false == false == false;
- //~^ ERROR: Chained comparison operators require parentheses
+ //~^ ERROR: chained comparison operators require parentheses
false == 0 < 2;
- //~^ ERROR: Chained comparison operators require parentheses
+ //~^ ERROR: chained comparison operators require parentheses
f<X>();
- //~^ ERROR: Chained comparison operators require parentheses
- //~^^ HELP: use ::< instead of < if you meant to specify type arguments
+ //~^ ERROR: chained comparison operators require parentheses
+ //~^^ HELP: use `::<...>` instead of `<...>`
}
pub fn main() {
f<type>();
//~^ ERROR expected identifier, found keyword `type`
- //~^^ ERROR: Chained comparison operators require parentheses
- //~^^^ HELP: use ::< instead of < if you meant to specify type arguments
+ //~^^ ERROR: chained comparison
+ //~^^^ HELP: use `::<
}
--- /dev/null
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// aux-build:macro_with_super_1.rs
+
+#[macro_use]
+extern crate macro_with_super_1;
+
+declare!();
+
+fn main() {
+ bbb::ccc();
+}
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-// exec-env:RUST_LOG=rust-log-filter/f.o
+// exec-env:RUST_LOG=rust-log-filter/foo
#![allow(unknown_features)]
#![feature(box_syntax)]
let _t = Thread::spawn(move|| {
log::set_logger(logger);
- // our regex is "f.o"
- // ensure it is a regex, and isn't anchored
info!("foo");
info!("bar");
info!("foo bar");
info!("bar foo");
- info!("f1o");
});
assert_eq!(rx.recv().unwrap().as_slice(), "foo");
assert_eq!(rx.recv().unwrap().as_slice(), "foo bar");
assert_eq!(rx.recv().unwrap().as_slice(), "bar foo");
- assert_eq!(rx.recv().unwrap().as_slice(), "f1o");
assert!(rx.recv().is_err());
}