TARGET_CRATES := libc std flate arena term \
serialize getopts collections test rand \
- log regex graphviz core rbml alloc \
+ log graphviz core rbml alloc \
unicode rustc_bitflags
RUSTC_CRATES := rustc rustc_typeck rustc_borrowck rustc_resolve rustc_driver \
rustc_trans rustc_back rustc_llvm rustc_privacy
DEPS_getopts := std
DEPS_collections := core alloc unicode
DEPS_num := std
-DEPS_test := std getopts serialize rbml term regex native:rust_test_helpers
+DEPS_test := std getopts serialize rbml term native:rust_test_helpers
DEPS_rand := core
-DEPS_log := std regex
-DEPS_regex := std
+DEPS_log := std
DEPS_fmt_macros = std
TOOL_DEPS_compiletest := test getopts
TOOL_DEPS_rustdoc := rustdoc
TOOL_DEPS_rustc := rustc_driver
-TOOL_DEPS_rustbook := std regex rustdoc
+TOOL_DEPS_rustbook := std rustdoc
TOOL_SOURCE_compiletest := $(S)src/compiletest/compiletest.rs
TOOL_SOURCE_rustdoc := $(S)src/driver/driver.rs
TOOL_SOURCE_rustc := $(S)src/driver/driver.rs
$(filter-out rustc_driver, \
$(filter-out rustc_privacy, \
$(filter-out log, \
- $(filter-out regex, \
$(filter-out getopts, \
- $(filter-out syntax, $(CRATES))))))))))))
+ $(filter-out syntax, $(CRATES)))))))))))
COMPILER_DOC_CRATES := rustc rustc_trans rustc_borrowck rustc_resolve \
rustc_typeck rustc_driver syntax rustc_privacy
use std::fmt;
use std::str::FromStr;
-use regex::Regex;
#[derive(Clone, PartialEq, Debug)]
pub enum Mode {
pub run_ignored: bool,
// Only run tests that match this filter
- pub filter: Option<Regex>,
-
- // Precompiled regex for finding expected errors in cfail
- pub cfail_regex: Regex,
+ pub filter: Option<String>,
// Write out a parseable log of tests that were run
pub logfile: Option<Path>,
#[macro_use]
extern crate log;
-extern crate regex;
use std::os;
use std::io;
use common::Config;
use common::{Pretty, DebugInfoGdb, DebugInfoLldb, Codegen};
use util::logv;
-use regex::Regex;
pub mod procsrv;
pub mod util;
}
let filter = if !matches.free.is_empty() {
- let s = matches.free[0].as_slice();
- match regex::Regex::new(s) {
- Ok(re) => Some(re),
- Err(e) => {
- println!("failed to parse filter /{}/: {:?}", s, e);
- panic!()
- }
- }
+ Some(matches.free[0].clone())
} else {
None
};
.as_slice()).expect("invalid mode"),
run_ignored: matches.opt_present("ignored"),
filter: filter,
- cfail_regex: Regex::new(errors::EXPECTED_PATTERN).unwrap(),
logfile: matches.opt_str("logfile").map(|s| Path::new(s)),
runtool: matches.opt_str("runtool"),
host_rustcflags: matches.opt_str("host-rustcflags"),
if full_version_line.as_slice().trim().len() > 0 => {
let full_version_line = full_version_line.as_slice().trim();
- let re = Regex::new(r"(^|[^0-9])([0-9]\.[0-9])([^0-9]|$)").unwrap();
-
- match re.captures(full_version_line) {
- Some(captures) => {
- Some(captures.at(2).unwrap_or("").to_string())
+ // used to be a regex "(^|[^0-9])([0-9]\.[0-9])([^0-9]|$)"
+ for (pos, c) in full_version_line.char_indices() {
+ if !c.is_digit(10) { continue }
+ if pos + 2 >= full_version_line.len() { continue }
+ if full_version_line.char_at(pos + 1) != '.' { continue }
+ if !full_version_line.char_at(pos + 2).is_digit(10) { continue }
+ if pos > 0 && full_version_line.char_at_reverse(pos).is_digit(10) {
+ continue
}
- None => {
- println!("Could not extract GDB version from line '{}'",
- full_version_line);
- None
+ if pos + 3 < full_version_line.len() &&
+ full_version_line.char_at(pos + 3).is_digit(10) {
+ continue
}
+ return Some(full_version_line[pos..pos+3].to_string());
}
+ println!("Could not extract GDB version from line '{}'",
+ full_version_line);
+ None
},
_ => None
}
if full_version_line.as_slice().trim().len() > 0 => {
let full_version_line = full_version_line.as_slice().trim();
- let re = Regex::new(r"[Ll][Ll][Dd][Bb]-([0-9]+)").unwrap();
-
- match re.captures(full_version_line) {
- Some(captures) => {
- Some(captures.at(1).unwrap_or("").to_string())
- }
- None => {
- println!("Could not extract LLDB version from line '{}'",
- full_version_line);
- None
- }
+ for (pos, l) in full_version_line.char_indices() {
+ if l != 'l' && l != 'L' { continue }
+ if pos + 5 >= full_version_line.len() { continue }
+ let l = full_version_line.char_at(pos + 1);
+ if l != 'l' && l != 'L' { continue }
+ let d = full_version_line.char_at(pos + 2);
+ if d != 'd' && d != 'D' { continue }
+ let b = full_version_line.char_at(pos + 3);
+ if b != 'b' && b != 'B' { continue }
+ let dash = full_version_line.char_at(pos + 4);
+ if dash != '-' { continue }
+
+ let vers = full_version_line[pos + 5..].chars().take_while(|c| {
+ c.is_digit(10)
+ }).collect::<String>();
+ if vers.len() > 0 { return Some(vers) }
}
+ println!("Could not extract LLDB version from line '{}'",
+ full_version_line);
+ None
},
_ => None
}
// except according to those terms.
use self::WhichLine::*;
-use std::ascii::AsciiExt;
use std::io::{BufferedReader, File};
-use regex::Regex;
pub struct ExpectedError {
pub line: uint,
pub msg: String,
}
+#[derive(PartialEq, Show)]
+enum WhichLine { ThisLine, FollowPrevious(uint), AdjustBackward(uint) }
+
/// Looks for either "//~| KIND MESSAGE" or "//~^^... KIND MESSAGE"
/// The former is a "follow" that inherits its target from the preceding line;
/// the latter is an "adjusts" that goes that many lines up.
/// Goal is to enable tests both like: //~^^^ ERROR go up three
/// and also //~^ ERROR message one for the preceding line, and
/// //~| ERROR message two for that same line.
-
-pub static EXPECTED_PATTERN : &'static str =
- r"//~(?P<follow>\|)?(?P<adjusts>\^*)\s*(?P<kind>\S*)\s*(?P<msg>.*)";
-
-#[derive(PartialEq, Show)]
-enum WhichLine { ThisLine, FollowPrevious(uint), AdjustBackward(uint) }
-
// Load any test directives embedded in the file
-pub fn load_errors(re: &Regex, testfile: &Path) -> Vec<ExpectedError> {
+pub fn load_errors(testfile: &Path) -> Vec<ExpectedError> {
let mut rdr = BufferedReader::new(File::open(testfile).unwrap());
// `last_nonfollow_error` tracks the most recently seen
rdr.lines().enumerate().filter_map(|(line_no, ln)| {
parse_expected(last_nonfollow_error,
line_no + 1,
- ln.unwrap().as_slice(), re)
+ ln.unwrap().as_slice())
.map(|(which, error)| {
match which {
FollowPrevious(_) => {}
fn parse_expected(last_nonfollow_error: Option<uint>,
line_num: uint,
- line: &str,
- re: &Regex) -> Option<(WhichLine, ExpectedError)> {
- re.captures(line).and_then(|caps| {
- let adjusts = caps.name("adjusts").unwrap_or("").len();
- let kind = caps.name("kind").unwrap_or("").to_ascii_lowercase();
- let msg = caps.name("msg").unwrap_or("").trim().to_string();
- let follow = caps.name("follow").unwrap_or("").len() > 0;
+ line: &str) -> Option<(WhichLine, ExpectedError)> {
+ let start = match line.find_str("//~") { Some(i) => i, None => return None };
+ let (follow, adjusts) = if line.char_at(start + 3) == '|' {
+ (true, 0)
+ } else {
+ (false, line[start + 3..].chars().take_while(|c| *c == '^').count())
+ };
+ let kind_start = start + 3 + adjusts + (follow as usize);
+ let letters = line[kind_start..].chars();
+ let kind = letters.skip_while(|c| c.is_whitespace())
+ .take_while(|c| !c.is_whitespace())
+ .map(|c| c.to_lowercase())
+ .collect::<String>();
+ let letters = line[kind_start..].chars();
+ let msg = letters.skip_while(|c| c.is_whitespace())
+ .skip_while(|c| !c.is_whitespace())
+ .collect::<String>().trim().to_string();
- let (which, line) = if follow {
- assert!(adjusts == 0, "use either //~| or //~^, not both.");
- let line = last_nonfollow_error.unwrap_or_else(|| {
- panic!("encountered //~| without preceding //~^ line.")
- });
- (FollowPrevious(line), line)
- } else {
- let which =
- if adjusts > 0 { AdjustBackward(adjusts) } else { ThisLine };
- let line = line_num - adjusts;
- (which, line)
- };
+ let (which, line) = if follow {
+ assert!(adjusts == 0, "use either //~| or //~^, not both.");
+ let line = last_nonfollow_error.unwrap_or_else(|| {
+ panic!("encountered //~| without preceding //~^ line.")
+ });
+ (FollowPrevious(line), line)
+ } else {
+ let which =
+ if adjusts > 0 { AdjustBackward(adjusts) } else { ThisLine };
+ let line = line_num - adjusts;
+ (which, line)
+ };
- debug!("line={} which={:?} kind={:?} msg={:?}", line_num, which, kind, msg);
- Some((which, ExpectedError { line: line,
- kind: kind,
- msg: msg, }))
- })
+ debug!("line={} which={:?} kind={:?} msg={:?}", line_num, which, kind, msg);
+ Some((which, ExpectedError { line: line,
+ kind: kind,
+ msg: msg, }))
}
}
let output_to_check = get_output(props, &proc_res);
- let expected_errors = errors::load_errors(&config.cfail_regex, testfile);
+ let expected_errors = errors::load_errors(testfile);
if !expected_errors.is_empty() {
if !props.error_patterns.is_empty() {
fatal("both error pattern and expected errors specified");
extern crate syntax;
extern crate rustc;
-extern crate regex;
-
#[macro_use]
extern crate log;
use std::collections::HashMap;
use std::io::File;
-use regex::Regex;
use syntax::parse;
use syntax::parse::lexer;
}
fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>) -> TokenAndSpan {
- let re = Regex::new(
- r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]"
- ).unwrap();
-
- let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
- let start = m.name("start").unwrap_or("");
- let end = m.name("end").unwrap_or("");
- let toknum = m.name("toknum").unwrap_or("");
- let content = m.name("content").unwrap_or("");
+ // old regex:
+ // \[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]
+ let start = s.find_str("[@").unwrap();
+ let comma = start + s[start..].find_str(",").unwrap();
+ let colon = comma + s[comma..].find_str(":").unwrap();
+ let content_start = colon + s[colon..].find_str("='").unwrap();
+ let content_end = content_start + s[content_start..].find_str("',<").unwrap();
+ let toknum_end = content_end + s[content_end..].find_str(">,").unwrap();
+
+ let start = &s[comma + 1 .. colon];
+ let end = &s[colon + 1 .. content_start];
+ let content = &s[content_start + 2 .. content_end];
+ let toknum = &s[content_end + 3 .. toknum_end];
let proto_tok = tokens.get(toknum).expect(format!("didn't find token {:?} in the map",
toknum).as_slice());
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-use regex::Regex;
use std::ascii::AsciiExt;
use std::cmp;
///
/// Valid log levels are 0-255, with the most likely ones being 1-4 (defined in
/// std::). Also supports string log levels of error, warn, info, and debug
-pub fn parse_logging_spec(spec: &str) -> (Vec<LogDirective>, Option<Regex>) {
+pub fn parse_logging_spec(spec: &str) -> (Vec<LogDirective>, Option<String>) {
let mut dirs = Vec::new();
let mut parts = spec.split('/');
});
}});
- let filter = filter.map_or(None, |filter| {
- match Regex::new(filter) {
- Ok(re) => Some(re),
- Err(e) => {
- println!("warning: invalid regex filter - {:?}", e);
- None
- }
- }
- });
-
- return (dirs, filter);
+ (dirs, filter.map(|s| s.to_string()))
}
#[cfg(test)]
//!
//! # Filtering results
//!
-//! A RUST_LOG directive may include a regex filter. The syntax is to append `/`
-//! followed by a regex. Each message is checked against the regex, and is only
-//! logged if it matches. Note that the matching is done after formatting the log
-//! string but before adding any logging meta-data. There is a single filter for all
-//! modules.
+//! A RUST_LOG directive may include a string filter. The syntax is to append
+//! `/` followed by a string. Each message is checked against the string and is
+//! only logged if it contains the string. Note that the matching is done after
+//! formatting the log string but before adding any logging meta-data. There is
+//! a single filter for all modules.
//!
//! Some examples:
//!
#![allow(unstable)]
#![deny(missing_docs)]
-extern crate regex;
-
use std::cell::RefCell;
use std::fmt;
use std::io::LineBufferedWriter;
use std::slice;
use std::sync::{Once, ONCE_INIT};
-use regex::Regex;
-
use directive::LOG_LEVEL_NAMES;
#[macro_use]
static mut DIRECTIVES: *const Vec<directive::LogDirective> =
0 as *const Vec<directive::LogDirective>;
-/// Optional regex filter.
-static mut FILTER: *const Regex = 0 as *const _;
+/// Optional filter.
+static mut FILTER: *const String = 0 as *const _;
/// Debug log level
pub const DEBUG: u32 = 4;
// Test the literal string from args against the current filter, if there
// is one.
match unsafe { FILTER.as_ref() } {
- Some(filter) if !filter.is_match(&args.to_string()[]) => return,
+ Some(filter) if !args.to_string().contains(&filter[]) => return,
_ => {}
}
DIRECTIVES = ptr::null();
if !FILTER.is_null() {
- let _filter: Box<Regex> = mem::transmute(FILTER);
- FILTER = ptr::null();
+ let _filter: Box<String> = mem::transmute(FILTER);
+ FILTER = 0 as *const _;
}
});
}
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// Enable this to squash warnings due to exporting pieces of the representation
-// for use with the regex! macro. See lib.rs for explanation.
-
-pub use self::Inst::*;
-
-use std::cmp;
-use std::iter::repeat;
-use parse;
-use parse::{
- Flags, FLAG_EMPTY,
- Nothing, Literal, Dot, AstClass, Begin, End, WordBoundary, Capture, Cat, Alt,
- Rep,
- ZeroOne, ZeroMore, OneMore,
-};
-
-type InstIdx = uint;
-
-#[derive(Show, Clone)]
-pub enum Inst {
- // When a Match instruction is executed, the current thread is successful.
- Match,
-
- // The OneChar instruction matches a literal character.
- // The flags indicate whether to do a case insensitive match.
- OneChar(char, Flags),
-
- // The CharClass instruction tries to match one input character against
- // the range of characters given.
- // The flags indicate whether to do a case insensitive match and whether
- // the character class is negated or not.
- CharClass(Vec<(char, char)>, Flags),
-
- // Matches any character except new lines.
- // The flags indicate whether to include the '\n' character.
- Any(Flags),
-
- // Matches the beginning of the string, consumes no characters.
- // The flags indicate whether it matches if the preceding character
- // is a new line.
- EmptyBegin(Flags),
-
- // Matches the end of the string, consumes no characters.
- // The flags indicate whether it matches if the proceeding character
- // is a new line.
- EmptyEnd(Flags),
-
- // Matches a word boundary (\w on one side and \W \A or \z on the other),
- // and consumes no character.
- // The flags indicate whether this matches a word boundary or something
- // that isn't a word boundary.
- EmptyWordBoundary(Flags),
-
- // Saves the current position in the input string to the Nth save slot.
- Save(uint),
-
- // Jumps to the instruction at the index given.
- Jump(InstIdx),
-
- // Jumps to the instruction at the first index given. If that leads to
- // a panic state, then the instruction at the second index given is
- // tried.
- Split(InstIdx, InstIdx),
-}
-
-/// Program represents a compiled regular expression. Once an expression is
-/// compiled, its representation is immutable and will never change.
-///
-/// All of the data in a compiled expression is wrapped in "MaybeStatic" or
-/// "MaybeOwned" types so that a `Program` can be represented as static data.
-/// (This makes it convenient and efficient for use with the `regex!` macro.)
-#[derive(Clone)]
-pub struct Program {
- /// A sequence of instructions.
- pub insts: Vec<Inst>,
- /// If the regular expression requires a literal prefix in order to have a
- /// match, that prefix is stored here. (It's used in the VM to implement
- /// an optimization.)
- pub prefix: String,
-}
-
-impl Program {
- /// Compiles a Regex given its AST.
- pub fn new(ast: parse::Ast) -> (Program, Vec<Option<String>>) {
- let mut c = Compiler {
- insts: Vec::with_capacity(100),
- names: Vec::with_capacity(10),
- };
-
- c.insts.push(Save(0));
- c.compile(ast);
- c.insts.push(Save(1));
- c.insts.push(Match);
-
- // Try to discover a literal string prefix.
- // This is a bit hacky since we have to skip over the initial
- // 'Save' instruction.
- let mut pre = String::with_capacity(5);
- for inst in c.insts[1..].iter() {
- match *inst {
- OneChar(c, FLAG_EMPTY) => pre.push(c),
- _ => break
- }
- }
-
- let Compiler { insts, names } = c;
- let prog = Program {
- insts: insts,
- prefix: pre,
- };
- (prog, names)
- }
-
- /// Returns the total number of capture groups in the regular expression.
- /// This includes the zeroth capture.
- pub fn num_captures(&self) -> uint {
- let mut n = 0;
- for inst in self.insts.iter() {
- match *inst {
- Save(c) => n = cmp::max(n, c+1),
- _ => {}
- }
- }
- // There's exactly 2 Save slots for every capture.
- n / 2
- }
-}
-
-struct Compiler<'r> {
- insts: Vec<Inst>,
- names: Vec<Option<String>>,
-}
-
-// The compiler implemented here is extremely simple. Most of the complexity
-// in this crate is in the parser or the VM.
-// The only tricky thing here is patching jump/split instructions to point to
-// the right instruction.
-impl<'r> Compiler<'r> {
- fn compile(&mut self, ast: parse::Ast) {
- match ast {
- Nothing => {},
- Literal(c, flags) => self.push(OneChar(c, flags)),
- Dot(nl) => self.push(Any(nl)),
- AstClass(ranges, flags) =>
- self.push(CharClass(ranges, flags)),
- Begin(flags) => self.push(EmptyBegin(flags)),
- End(flags) => self.push(EmptyEnd(flags)),
- WordBoundary(flags) => self.push(EmptyWordBoundary(flags)),
- Capture(cap, name, x) => {
- let len = self.names.len();
- if cap >= len {
- self.names.extend(repeat(None).take(10 + cap - len))
- }
- self.names[cap] = name;
-
- self.push(Save(2 * cap));
- self.compile(*x);
- self.push(Save(2 * cap + 1));
- }
- Cat(xs) => {
- for x in xs.into_iter() {
- self.compile(x)
- }
- }
- Alt(x, y) => {
- let split = self.empty_split(); // push: split 0, 0
- let j1 = self.insts.len();
- self.compile(*x); // push: insts for x
- let jmp = self.empty_jump(); // push: jmp 0
- let j2 = self.insts.len();
- self.compile(*y); // push: insts for y
- let j3 = self.insts.len();
-
- self.set_split(split, j1, j2); // split 0, 0 -> split j1, j2
- self.set_jump(jmp, j3); // jmp 0 -> jmp j3
- }
- Rep(x, ZeroOne, g) => {
- let split = self.empty_split();
- let j1 = self.insts.len();
- self.compile(*x);
- let j2 = self.insts.len();
-
- if g.is_greedy() {
- self.set_split(split, j1, j2);
- } else {
- self.set_split(split, j2, j1);
- }
- }
- Rep(x, ZeroMore, g) => {
- let j1 = self.insts.len();
- let split = self.empty_split();
- let j2 = self.insts.len();
- self.compile(*x);
- let jmp = self.empty_jump();
- let j3 = self.insts.len();
-
- self.set_jump(jmp, j1);
- if g.is_greedy() {
- self.set_split(split, j2, j3);
- } else {
- self.set_split(split, j3, j2);
- }
- }
- Rep(x, OneMore, g) => {
- let j1 = self.insts.len();
- self.compile(*x);
- let split = self.empty_split();
- let j2 = self.insts.len();
-
- if g.is_greedy() {
- self.set_split(split, j1, j2);
- } else {
- self.set_split(split, j2, j1);
- }
- }
- }
- }
-
- /// Appends the given instruction to the program.
- #[inline]
- fn push(&mut self, x: Inst) {
- self.insts.push(x)
- }
-
- /// Appends an *empty* `Split` instruction to the program and returns
- /// the index of that instruction. (The index can then be used to "patch"
- /// the actual locations of the split in later.)
- #[inline]
- fn empty_split(&mut self) -> InstIdx {
- self.insts.push(Split(0, 0));
- self.insts.len() - 1
- }
-
- /// Sets the left and right locations of a `Split` instruction at index
- /// `i` to `pc1` and `pc2`, respectively.
- /// If the instruction at index `i` isn't a `Split` instruction, then
- /// `panic!` is called.
- #[inline]
- fn set_split(&mut self, i: InstIdx, pc1: InstIdx, pc2: InstIdx) {
- let split = &mut self.insts[i];
- match *split {
- Split(_, _) => *split = Split(pc1, pc2),
- _ => panic!("BUG: Invalid split index."),
- }
- }
-
- /// Appends an *empty* `Jump` instruction to the program and returns the
- /// index of that instruction.
- #[inline]
- fn empty_jump(&mut self) -> InstIdx {
- self.insts.push(Jump(0));
- self.insts.len() - 1
- }
-
- /// Sets the location of a `Jump` instruction at index `i` to `pc`.
- /// If the instruction at index `i` isn't a `Jump` instruction, then
- /// `panic!` is called.
- #[inline]
- fn set_jump(&mut self, i: InstIdx, pc: InstIdx) {
- let jmp = &mut self.insts[i];
- match *jmp {
- Jump(_) => *jmp = Jump(pc),
- _ => panic!("BUG: Invalid jump index."),
- }
- }
-}
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-//
-// ignore-lexer-test FIXME #15679
-
-//! Regular expressions implemented in Rust
-//!
-//! For official documentation, see the rust-lang/regex crate
-#![crate_name = "regex"]
-#![crate_type = "rlib"]
-#![crate_type = "dylib"]
-#![unstable = "use the crates.io `regex` library instead"]
-#![staged_api]
-#![doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
- html_favicon_url = "http://www.rust-lang.org/favicon.ico",
- html_root_url = "http://doc.rust-lang.org/nightly/",
- html_playground_url = "http://play.rust-lang.org/")]
-
-#![allow(unknown_features)]
-#![allow(unstable)]
-#![feature(slicing_syntax)]
-#![feature(box_syntax)]
-#![allow(unknown_features)] #![feature(int_uint)]
-#![deny(missing_docs)]
-
-#[cfg(test)]
-extern crate "test" as stdtest;
-#[cfg(test)]
-extern crate rand;
-
-// During tests, this links with the `regex` crate so that the `regex!` macro
-// can be tested.
-#[cfg(test)]
-extern crate regex;
-
-// Unicode tables for character classes are defined in libunicode
-extern crate unicode;
-
-pub use parse::Error;
-pub use re::{Regex, Captures, SubCaptures, SubCapturesPos};
-pub use re::{FindCaptures, FindMatches};
-pub use re::{Replacer, NoExpand, RegexSplits, RegexSplitsN};
-pub use re::{quote, is_match};
-
-mod compile;
-mod parse;
-mod re;
-mod vm;
-
-#[cfg(test)]
-mod test;
-
-/// The `native` module exists to support the `regex!` macro. Do not use.
-#[doc(hidden)]
-pub mod native {
- // Exporting this stuff is bad form, but it's necessary for two reasons.
- // Firstly, the `regex!` syntax extension is in a different crate and
- // requires access to the representation of a regex (particularly the
- // instruction set) in order to compile to native Rust. This could be
- // mitigated if `regex!` was defined in the same crate, but this has
- // undesirable consequences (such as requiring a dependency on
- // `libsyntax`).
- //
- // Secondly, the code generated by `regex!` must *also* be able
- // to access various functions in this crate to reduce code duplication
- // and to provide a value with precisely the same `Regex` type in this
- // crate. This, AFAIK, is impossible to mitigate.
- //
- // On the bright side, `rustdoc` lets us hide this from the public API
- // documentation.
- pub use compile::{
- Program,
- OneChar, CharClass, Any, Save, Jump, Split,
- Match, EmptyBegin, EmptyEnd, EmptyWordBoundary,
- };
- pub use parse::{
- FLAG_EMPTY, FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL,
- FLAG_SWAP_GREED, FLAG_NEGATED,
- };
- pub use re::{Dynamic, ExDynamic, Native, ExNative};
- pub use vm::{
- MatchKind, Exists, Location, Submatches,
- StepState, StepMatchEarlyReturn, StepMatch, StepContinue,
- CharReader, find_prefix,
- };
-}
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-pub use self::Ast::*;
-pub use self::Repeater::*;
-pub use self::Greed::*;
-use self::BuildAst::*;
-
-use std::char;
-use std::cmp;
-use std::fmt;
-use std::iter;
-use std::num;
-
-/// Static data containing Unicode ranges for general categories and scripts.
-use unicode::regex::{UNICODE_CLASSES, PERLD, PERLS, PERLW};
-
-/// The maximum number of repetitions allowed with the `{n,m}` syntax.
-static MAX_REPEAT: uint = 1000;
-
-/// Error corresponds to something that can go wrong while parsing
-/// a regular expression.
-///
-/// (Once an expression is compiled, it is not possible to produce an error
-/// via searching, splitting or replacing.)
-#[derive(Show)]
-pub struct Error {
- /// The *approximate* character index of where the error occurred.
- pub pos: uint,
- /// A message describing the error.
- pub msg: String,
-}
-
-impl fmt::Display for Error {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "Regex syntax error near position {}: {:?}",
- self.pos, self.msg)
- }
-}
-
-/// Represents the abstract syntax of a regular expression.
-/// It is showable so that error messages resulting from a bug can provide
-/// useful information.
-/// It is cloneable so that expressions can be repeated for the counted
-/// repetition feature. (No other copying is done.)
-///
-/// Note that this representation prevents one from reproducing the regex as
-/// it was typed. (But it could be used to reproduce an equivalent regex.)
-#[derive(Show, Clone)]
-pub enum Ast {
- Nothing,
- Literal(char, Flags),
- Dot(Flags),
- AstClass(Vec<(char, char)>, Flags),
- Begin(Flags),
- End(Flags),
- WordBoundary(Flags),
- Capture(uint, Option<String>, Box<Ast>),
- // Represent concatenation as a flat vector to avoid blowing the
- // stack in the compiler.
- Cat(Vec<Ast>),
- Alt(Box<Ast>, Box<Ast>),
- Rep(Box<Ast>, Repeater, Greed),
-}
-
-#[derive(Show, PartialEq, Clone)]
-pub enum Repeater {
- ZeroOne,
- ZeroMore,
- OneMore,
-}
-
-#[derive(Copy, Show, Clone)]
-pub enum Greed {
- Greedy,
- Ungreedy,
-}
-
-impl Greed {
- pub fn is_greedy(&self) -> bool {
- match *self {
- Greedy => true,
- _ => false,
- }
- }
-
- fn swap(self, swapped: bool) -> Greed {
- if !swapped { return self }
- match self {
- Greedy => Ungreedy,
- Ungreedy => Greedy,
- }
- }
-}
-
-/// BuildAst is a regrettable type that represents intermediate state for
-/// constructing an abstract syntax tree. Its central purpose is to facilitate
-/// parsing groups and alternations while also maintaining a stack of flag
-/// state.
-#[derive(Show)]
-enum BuildAst {
- Expr(Ast),
- Paren(Flags, uint, String), // '('
- Bar, // '|'
-}
-
-impl BuildAst {
- fn paren(&self) -> bool {
- match *self {
- Paren(_, _, _) => true,
- _ => false,
- }
- }
-
- fn flags(&self) -> Flags {
- match *self {
- Paren(flags, _, _) => flags,
- _ => panic!("Cannot get flags from {:?}", self),
- }
- }
-
- fn capture(&self) -> Option<uint> {
- match *self {
- Paren(_, 0, _) => None,
- Paren(_, c, _) => Some(c),
- _ => panic!("Cannot get capture group from {:?}", self),
- }
- }
-
- fn capture_name(&self) -> Option<String> {
- match *self {
- Paren(_, 0, _) => None,
- Paren(_, _, ref name) => {
- if name.len() == 0 {
- None
- } else {
- Some(name.clone())
- }
- }
- _ => panic!("Cannot get capture name from {:?}", self),
- }
- }
-
- fn bar(&self) -> bool {
- match *self {
- Bar => true,
- _ => false,
- }
- }
-
- fn unwrap(self) -> Result<Ast, Error> {
- match self {
- Expr(x) => Ok(x),
- _ => panic!("Tried to unwrap non-AST item: {:?}", self),
- }
- }
-}
-
-/// Flags represents all options that can be twiddled by a user in an
-/// expression.
-pub type Flags = u8;
-
-pub const FLAG_EMPTY: u8 = 0;
-pub const FLAG_NOCASE: u8 = 1 << 0; // i
-pub const FLAG_MULTI: u8 = 1 << 1; // m
-pub const FLAG_DOTNL: u8 = 1 << 2; // s
-pub const FLAG_SWAP_GREED: u8 = 1 << 3; // U
-pub const FLAG_NEGATED: u8 = 1 << 4; // char class or not word boundary
-
-struct Parser<'a> {
- // The input, parsed only as a sequence of UTF8 code points.
- chars: Vec<char>,
- // The index of the current character in the input.
- chari: uint,
- // The intermediate state representing the AST.
- stack: Vec<BuildAst>,
- // The current set of flags.
- flags: Flags,
- // The total number of capture groups.
- // Incremented each time an opening left paren is seen (assuming it is
- // opening a capture group).
- caps: uint,
- // A set of all capture group names used only to detect duplicates.
- names: Vec<String>,
-}
-
-pub fn parse(s: &str) -> Result<Ast, Error> {
- Parser {
- chars: s.chars().collect(),
- chari: 0,
- stack: vec!(),
- flags: FLAG_EMPTY,
- caps: 0,
- names: vec!(),
- }.parse()
-}
-
-impl<'a> Parser<'a> {
- fn parse(&mut self) -> Result<Ast, Error> {
- if self.chars.len() == 0 {
- return Ok(Nothing);
- }
- loop {
- let c = self.cur();
- match c {
- '?' | '*' | '+' => try!(self.push_repeater(c)),
- '\\' => {
- let ast = try!(self.parse_escape());
- self.push(ast)
- }
- '{' => try!(self.parse_counted()),
- '[' => match self.try_parse_ascii() {
- None => try!(self.parse_class()),
- Some(class) => self.push(class),
- },
- '(' => {
- if self.peek_is(1, '?') {
- try!(self.expect('?'));
- try!(self.parse_group_opts())
- } else {
- self.caps += 1;
- self.stack.push(Paren(self.flags,
- self.caps,
- "".to_string()))
- }
- }
- ')' => {
- let catfrom = try!(
- self.pos_last(false, |x| x.paren() || x.bar()));
- try!(self.concat(catfrom));
-
- let altfrom = try!(self.pos_last(false, |x| x.paren()));
- // Before we smush the alternates together and pop off the
- // left paren, let's grab the old flags and see if we
- // need a capture.
- let (cap, cap_name, oldflags) = {
- let paren = &self.stack[altfrom-1];
- (paren.capture(), paren.capture_name(), paren.flags())
- };
- try!(self.alternate(altfrom));
- self.flags = oldflags;
-
- // If this was a capture, pop what we just pushed in
- // alternate and make it a capture.
- if cap.is_some() {
- let ast = try!(self.pop_ast());
- self.push(Capture(cap.unwrap(), cap_name, box ast));
- }
- }
- '|' => {
- let catfrom = try!(
- self.pos_last(true, |x| x.paren() || x.bar()));
- try!(self.concat(catfrom));
-
- self.stack.push(Bar);
- }
- _ => try!(self.push_literal(c)),
- }
- if !self.next_char() {
- break
- }
- }
-
- // Try to improve error handling. At this point, there should be
- // no remaining open parens.
- if self.stack.iter().any(|x| x.paren()) {
- return self.err("Unclosed parenthesis.")
- }
- let catfrom = try!(self.pos_last(true, |x| x.bar()));
- try!(self.concat(catfrom));
- try!(self.alternate(0));
-
- assert!(self.stack.len() == 1);
- self.pop_ast()
- }
-
- fn noteof(&mut self, expected: &str) -> Result<(), Error> {
- match self.next_char() {
- true => Ok(()),
- false => {
- self.err(&format!("Expected {:?} but got EOF.",
- expected)[])
- }
- }
- }
-
- fn expect(&mut self, expected: char) -> Result<(), Error> {
- match self.next_char() {
- true if self.cur() == expected => Ok(()),
- true => self.err(&format!("Expected '{:?}' but got '{:?}'.",
- expected, self.cur())[]),
- false => {
- self.err(&format!("Expected '{:?}' but got EOF.",
- expected)[])
- }
- }
- }
-
- fn next_char(&mut self) -> bool {
- self.chari += 1;
- self.chari < self.chars.len()
- }
-
- fn pop_ast(&mut self) -> Result<Ast, Error> {
- match self.stack.pop().unwrap().unwrap() {
- Err(e) => Err(e),
- Ok(ast) => Ok(ast),
- }
- }
-
- fn push(&mut self, ast: Ast) {
- self.stack.push(Expr(ast))
- }
-
- fn push_repeater(&mut self, c: char) -> Result<(), Error> {
- match self.stack.last() {
- Some(&Expr(..)) => (),
- // self.stack is empty, or the top item is not an Expr
- _ => return self.err("A repeat operator must be preceded by a valid expression."),
- }
- let rep: Repeater = match c {
- '?' => ZeroOne, '*' => ZeroMore, '+' => OneMore,
- _ => panic!("Not a valid repeater operator."),
- };
-
- match self.peek(1) {
- Some('*') | Some('+') =>
- return self.err(
- "Double repeat operators are not supported."),
- _ => {},
- }
- let ast = try!(self.pop_ast());
- match ast {
- Begin(_) | End(_) | WordBoundary(_) =>
- return self.err(
- "Repeat arguments cannot be empty width assertions."),
- _ => {}
- }
- let greed = try!(self.get_next_greedy());
- self.push(Rep(box ast, rep, greed));
- Ok(())
- }
-
- fn push_literal(&mut self, c: char) -> Result<(), Error> {
- let flags = self.flags;
- match c {
- '.' => {
- self.push(Dot(flags))
- }
- '^' => {
- self.push(Begin(flags))
- }
- '$' => {
- self.push(End(flags))
- }
- _ => {
- self.push(Literal(c, flags))
- }
- }
- Ok(())
- }
-
- // Parses all forms of character classes.
- // Assumes that '[' is the current character.
- fn parse_class(&mut self) -> Result<(), Error> {
- let negated =
- if self.peek_is(1, '^') {
- try!(self.expect('^'));
- FLAG_NEGATED
- } else {
- FLAG_EMPTY
- };
- let mut ranges: Vec<(char, char)> = vec!();
- let mut alts: Vec<Ast> = vec!();
-
- while self.peek_is(1, '-') {
- try!(self.expect('-'));
- ranges.push(('-', '-'))
- }
- loop {
- try!(self.noteof("a closing ']' or a non-empty character class)"));
- let mut c = self.cur();
- match c {
- '[' =>
- match self.try_parse_ascii() {
- Some(AstClass(asciis, flags)) => {
- alts.push(AstClass(asciis, flags ^ negated));
- continue
- }
- Some(ast) =>
- panic!("Expected Class AST but got '{:?}'", ast),
- // Just drop down and try to add as a regular character.
- None => {},
- },
- '\\' => {
- match try!(self.parse_escape()) {
- AstClass(asciis, flags) => {
- alts.push(AstClass(asciis, flags ^ negated));
- continue
- }
- Literal(c2, _) => c = c2, // process below
- Begin(_) | End(_) | WordBoundary(_) =>
- return self.err(
- "\\A, \\z, \\b and \\B are not valid escape \
- sequences inside a character class."),
- ast => panic!("Unexpected AST item '{:?}'", ast),
- }
- }
- ']' if ranges.len() > 0 || alts.len() > 0 => {
- if ranges.len() > 0 {
- let flags = negated | (self.flags & FLAG_NOCASE);
- let mut ast = AstClass(combine_ranges(ranges), flags);
- for alt in alts.into_iter() {
- ast = Alt(box alt, box ast)
- }
- self.push(ast);
- } else if alts.len() > 0 {
- let mut ast = alts.pop().unwrap();
- for alt in alts.into_iter() {
- ast = Alt(box alt, box ast)
- }
- self.push(ast);
- }
- return Ok(())
- }
- _ => {}
- }
-
- if self.peek_is(1, '-') && !self.peek_is(2, ']') {
- try!(self.expect('-'));
- // The regex can't end here.
- try!(self.noteof("not a ']'"));
- // End the range with a single character or character escape.
- let mut c2 = self.cur();
- if c2 == '\\' {
- match try!(self.parse_escape()) {
- Literal(c3, _) => c2 = c3, // allow literal escapes below
- ast =>
- return self.err(&format!("Expected a literal, but got {:?}.",
- ast)[]),
- }
- }
- if c2 < c {
- return self.err(&format!("Invalid character class \
- range '{}-{}'",
- c,
- c2)[])
- }
- ranges.push((c, self.cur()))
- } else {
- ranges.push((c, c))
- }
- }
- }
-
- // Tries to parse an ASCII character class of the form [:name:].
- // If successful, returns an AST character class corresponding to name
- // and moves the parser to the final ']' character.
- // If unsuccessful, no state is changed and None is returned.
- // Assumes that '[' is the current character.
- fn try_parse_ascii(&mut self) -> Option<Ast> {
- if !self.peek_is(1, ':') {
- return None
- }
- let closer =
- match self.pos(']') {
- Some(i) => i,
- None => return None,
- };
- if self.chars[closer-1] != ':' {
- return None
- }
- if closer - self.chari <= 3 {
- return None
- }
- let mut name_start = self.chari + 2;
- let negated =
- if self.peek_is(2, '^') {
- name_start += 1;
- FLAG_NEGATED
- } else {
- FLAG_EMPTY
- };
- let name = self.slice(name_start, closer - 1);
- match find_class(ASCII_CLASSES, &name[]) {
- None => None,
- Some(ranges) => {
- self.chari = closer;
- let flags = negated | (self.flags & FLAG_NOCASE);
- Some(AstClass(combine_ranges(ranges), flags))
- }
- }
- }
-
- // Parses counted repetition. Supports:
- // {n}, {n,}, {n,m}, {n}?, {n,}? and {n,m}?
- // Assumes that '{' is the current character.
- // Returns either an error or moves the parser to the final '}' character.
- // (Or the '?' character if not greedy.)
- fn parse_counted(&mut self) -> Result<(), Error> {
- // Scan until the closing '}' and grab the stuff in {}.
- let start = self.chari;
- let closer =
- match self.pos('}') {
- Some(i) => i,
- None => {
- return self.err(&format!("No closing brace for counted \
- repetition starting at position \
- {:?}.",
- start)[])
- }
- };
- self.chari = closer;
- let greed = try!(self.get_next_greedy());
- let inner = self.chars[start+1..closer].iter().cloned()
- .collect::<String>();
-
- // Parse the min and max values from the regex.
- let (mut min, mut max): (uint, Option<uint>);
- if !inner.contains(",") {
- min = try!(self.parse_uint(&inner[]));
- max = Some(min);
- } else {
- let pieces: Vec<&str> = inner.splitn(1, ',').collect();
- let (smin, smax) = (pieces[0], pieces[1]);
- if smin.len() == 0 {
- return self.err("Max repetitions cannot be specified \
- without min repetitions.")
- }
- min = try!(self.parse_uint(smin));
- max =
- if smax.len() == 0 {
- None
- } else {
- Some(try!(self.parse_uint(smax)))
- };
- }
-
- // Do some bounds checking and make sure max >= min.
- if min > MAX_REPEAT {
- return self.err(&format!(
- "{} exceeds maximum allowed repetitions ({})",
- min, MAX_REPEAT)[]);
- }
- if max.is_some() {
- let m = max.unwrap();
- if m > MAX_REPEAT {
- return self.err(&format!(
- "{} exceeds maximum allowed repetitions ({})",
- m, MAX_REPEAT)[]);
- }
- if m < min {
- return self.err(&format!(
- "Max repetitions ({}) cannot be smaller than min \
- repetitions ({}).", m, min)[]);
- }
- }
-
- // Now manipulate the AST be repeating elements.
- if max.is_none() {
- // Require N copies of what's on the stack and then repeat it.
- let ast = try!(self.pop_ast());
- for _ in iter::range(0, min) {
- self.push(ast.clone())
- }
- self.push(Rep(box ast, ZeroMore, greed));
- } else {
- // Require N copies of what's on the stack and then repeat it
- // up to M times optionally.
- let ast = try!(self.pop_ast());
- for _ in iter::range(0, min) {
- self.push(ast.clone())
- }
- if max.is_some() {
- for _ in iter::range(min, max.unwrap()) {
- self.push(Rep(box ast.clone(), ZeroOne, greed))
- }
- }
- // It's possible that we popped something off the stack but
- // never put anything back on it. To keep things simple, add
- // a no-op expression.
- if min == 0 && (max.is_none() || max == Some(0)) {
- self.push(Nothing)
- }
- }
- Ok(())
- }
-
- // Parses all escape sequences.
- // Assumes that '\' is the current character.
- fn parse_escape(&mut self) -> Result<Ast, Error> {
- try!(self.noteof("an escape sequence following a '\\'"));
-
- let c = self.cur();
- if is_punct(c) {
- return Ok(Literal(c, FLAG_EMPTY))
- }
- match c {
- 'a' => Ok(Literal('\x07', FLAG_EMPTY)),
- 'f' => Ok(Literal('\x0C', FLAG_EMPTY)),
- 't' => Ok(Literal('\t', FLAG_EMPTY)),
- 'n' => Ok(Literal('\n', FLAG_EMPTY)),
- 'r' => Ok(Literal('\r', FLAG_EMPTY)),
- 'v' => Ok(Literal('\x0B', FLAG_EMPTY)),
- 'A' => Ok(Begin(FLAG_EMPTY)),
- 'z' => Ok(End(FLAG_EMPTY)),
- 'b' => Ok(WordBoundary(FLAG_EMPTY)),
- 'B' => Ok(WordBoundary(FLAG_NEGATED)),
- '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7' => Ok(try!(self.parse_octal())),
- 'x' => Ok(try!(self.parse_hex())),
- 'p' | 'P' => Ok(try!(self.parse_unicode_name())),
- 'd' | 'D' | 's' | 'S' | 'w' | 'W' => {
- let ranges = perl_unicode_class(c);
- let mut flags = self.flags & FLAG_NOCASE;
- if c.is_uppercase() { flags |= FLAG_NEGATED }
- Ok(AstClass(ranges, flags))
- }
- _ => {
- self.err(&format!("Invalid escape sequence '\\\\{}'", c)[])
- }
- }
- }
-
- // Parses a Unicode character class name, either of the form \pF where
- // F is a one letter Unicode class name or of the form \p{name} where
- // name is the Unicode class name.
- // Assumes that \p or \P has been read (and 'p' or 'P' is the current
- // character).
- fn parse_unicode_name(&mut self) -> Result<Ast, Error> {
- let negated = if self.cur() == 'P' { FLAG_NEGATED } else { FLAG_EMPTY };
- let mut name: String;
- if self.peek_is(1, '{') {
- try!(self.expect('{'));
- let closer =
- match self.pos('}') {
- Some(i) => i,
- None => return self.err(&format!(
- "Missing '}}' for unclosed '{{' at position {}",
- self.chari)[]),
- };
- if closer - self.chari + 1 == 0 {
- return self.err("No Unicode class name found.")
- }
- name = self.slice(self.chari + 1, closer);
- self.chari = closer;
- } else {
- if self.chari + 1 >= self.chars.len() {
- return self.err("No single letter Unicode class name found.")
- }
- name = self.slice(self.chari + 1, self.chari + 2);
- self.chari += 1;
- }
- match find_class(UNICODE_CLASSES, &name[]) {
- None => {
- return self.err(&format!("Could not find Unicode class '{}'",
- name)[])
- }
- Some(ranges) => {
- Ok(AstClass(ranges, negated | (self.flags & FLAG_NOCASE)))
- }
- }
- }
-
- // Parses an octal number, up to 3 digits.
- // Assumes that \n has been read, where n is the first digit.
- fn parse_octal(&mut self) -> Result<Ast, Error> {
- let start = self.chari;
- let mut end = start + 1;
- let (d2, d3) = (self.peek(1), self.peek(2));
- if d2 >= Some('0') && d2 <= Some('7') {
- try!(self.noteof("expected octal character in [0-7]"));
- end += 1;
- if d3 >= Some('0') && d3 <= Some('7') {
- try!(self.noteof("expected octal character in [0-7]"));
- end += 1;
- }
- }
- let s = self.slice(start, end);
- match num::from_str_radix::<u32>(&s[], 8) {
- Some(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)),
- None => {
- self.err(&format!("Could not parse '{:?}' as octal number.",
- s)[])
- }
- }
- }
-
- // Parse a hex number. Either exactly two digits or anything in {}.
- // Assumes that \x has been read.
- fn parse_hex(&mut self) -> Result<Ast, Error> {
- if !self.peek_is(1, '{') {
- try!(self.expect('{'));
- return self.parse_hex_two()
- }
- let start = self.chari + 2;
- let closer =
- match self.pos('}') {
- None => {
- return self.err(&format!("Missing '}}' for unclosed \
- '{{' at position {}",
- start)[])
- }
- Some(i) => i,
- };
- self.chari = closer;
- self.parse_hex_digits(&self.slice(start, closer)[])
- }
-
- // Parses a two-digit hex number.
- // Assumes that \xn has been read, where n is the first digit and is the
- // current character.
- // After return, parser will point at the second digit.
- fn parse_hex_two(&mut self) -> Result<Ast, Error> {
- let (start, end) = (self.chari, self.chari + 2);
- let bad = self.slice(start - 2, self.chars.len());
- try!(self.noteof(format!("Invalid hex escape sequence '{}'",
- bad).as_slice()));
- self.parse_hex_digits(self.slice(start, end).as_slice())
- }
-
- // Parses `s` as a hexadecimal number.
- fn parse_hex_digits(&self, s: &str) -> Result<Ast, Error> {
- match num::from_str_radix::<u32>(s, 16) {
- Some(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)),
- None => {
- self.err(&format!("Could not parse '{}' as hex number.", s)[])
- }
- }
- }
-
- // Parses a named capture.
- // Assumes that '(?P<' has been consumed and that the current character
- // is '<'.
- // When done, parser will be at the closing '>' character.
- fn parse_named_capture(&mut self) -> Result<(), Error> {
- try!(self.noteof("a capture name"));
- let closer =
- match self.pos('>') {
- Some(i) => i,
- None => return self.err("Capture name must end with '>'."),
- };
- if closer - self.chari == 0 {
- return self.err("Capture names must have at least 1 character.")
- }
- let name = self.slice(self.chari, closer);
- if !name.chars().all(is_valid_cap) {
- return self.err(
- "Capture names can only have underscores, letters and digits.")
- }
- if self.names.contains(&name) {
- return self.err(&format!("Duplicate capture group name '{}'.",
- name)[])
- }
- self.names.push(name.clone());
- self.chari = closer;
- self.caps += 1;
- self.stack.push(Paren(self.flags, self.caps, name));
- Ok(())
- }
-
- // Parses non-capture groups and options.
- // Assumes that '(?' has already been consumed and '?' is the current
- // character.
- fn parse_group_opts(&mut self) -> Result<(), Error> {
- if self.peek_is(1, 'P') && self.peek_is(2, '<') {
- try!(self.expect('P'));
- try!(self.expect('<'));
- return self.parse_named_capture()
- }
- let start = self.chari;
- let mut flags = self.flags;
- let mut sign = 1i;
- let mut saw_flag = false;
- loop {
- try!(self.noteof(
- "expected non-empty set of flags or closing ')'"));
- match self.cur() {
- 'i' => { flags = flags | FLAG_NOCASE; saw_flag = true},
- 'm' => { flags = flags | FLAG_MULTI; saw_flag = true},
- 's' => { flags = flags | FLAG_DOTNL; saw_flag = true},
- 'U' => { flags = flags | FLAG_SWAP_GREED; saw_flag = true},
- '-' => {
- if sign < 0 {
- return self.err(&format!(
- "Cannot negate flags twice in '{}'.",
- self.slice(start, self.chari + 1))[])
- }
- sign = -1;
- saw_flag = false;
- flags = flags ^ flags;
- }
- ':' | ')' => {
- if sign < 0 {
- if !saw_flag {
- return self.err(&format!(
- "A valid flag does not follow negation in '{}'",
- self.slice(start, self.chari + 1))[])
- }
- flags = flags ^ flags;
- }
- if self.cur() == ':' {
- // Save the old flags with the opening paren.
- self.stack.push(Paren(self.flags, 0, "".to_string()));
- }
- self.flags = flags;
- return Ok(())
- }
- _ => return self.err(&format!(
- "Unrecognized flag '{}'.", self.cur())[]),
- }
- }
- }
-
- // Peeks at the next character and returns whether it's ungreedy or not.
- // If it is, then the next character is consumed.
- fn get_next_greedy(&mut self) -> Result<Greed, Error> {
- Ok(if self.peek_is(1, '?') {
- try!(self.expect('?'));
- Ungreedy
- } else {
- Greedy
- }.swap(self.flags & FLAG_SWAP_GREED > 0))
- }
-
- // Searches the stack (starting at the top) until it finds an expression
- // for which `pred` returns true. The index of that expression in the
- // stack is returned.
- // If there's no match, then one of two things happens depending on the
- // values of `allow_start`. When it's true, then `0` will be returned.
- // Otherwise, an error will be returned.
- // Generally, `allow_start` is only true when you're *not* expecting an
- // opening parenthesis.
- fn pos_last<P>(&self, allow_start: bool, pred: P) -> Result<uint, Error> where
- P: FnMut(&BuildAst) -> bool,
- {
- let from = match self.stack.iter().rev().position(pred) {
- Some(i) => i,
- None => {
- if allow_start {
- self.stack.len()
- } else {
- return self.err("No matching opening parenthesis.")
- }
- }
- };
- // Adjust index since 'from' is for the reversed stack.
- // Also, don't include the '(' or '|'.
- Ok(self.stack.len() - from)
- }
-
- // concat starts at `from` in the parser's stack and concatenates all
- // expressions up to the top of the stack. The resulting concatenation is
- // then pushed on to the stack.
- // Usually `from` corresponds to the position of an opening parenthesis,
- // a '|' (alternation) or the start of the entire expression.
- fn concat(&mut self, from: uint) -> Result<(), Error> {
- let ast = try!(self.build_from(from, concat_flatten));
- self.push(ast);
- Ok(())
- }
-
- // concat starts at `from` in the parser's stack and alternates all
- // expressions up to the top of the stack. The resulting alternation is
- // then pushed on to the stack.
- // Usually `from` corresponds to the position of an opening parenthesis
- // or the start of the entire expression.
- // This will also drop any opening parens or alternation bars found in
- // the intermediate AST.
- fn alternate(&mut self, mut from: uint) -> Result<(), Error> {
- // Unlike in the concatenation case, we want 'build_from' to continue
- // all the way to the opening left paren (so it will be popped off and
- // thrown away). But be careful with overflow---we can't count on the
- // open paren to be there.
- if from > 0 { from = from - 1}
- let ast = try!(self.build_from(from, |l,r| Alt(box l, box r)));
- self.push(ast);
- Ok(())
- }
-
- // build_from combines all AST elements starting at 'from' in the
- // parser's stack using 'mk' to combine them. If any such element is not an
- // AST then it is popped off the stack and ignored.
- fn build_from<F>(&mut self, from: uint, mut mk: F) -> Result<Ast, Error> where
- F: FnMut(Ast, Ast) -> Ast,
- {
- if from >= self.stack.len() {
- return self.err("Empty group or alternate not allowed.")
- }
-
- let mut combined = try!(self.pop_ast());
- let mut i = self.stack.len();
- while i > from {
- i = i - 1;
- match self.stack.pop().unwrap() {
- Expr(x) => combined = mk(x, combined),
- _ => {},
- }
- }
- Ok(combined)
- }
-
- fn parse_uint(&self, s: &str) -> Result<uint, Error> {
- match s.parse::<uint>() {
- Some(i) => Ok(i),
- None => {
- self.err(&format!("Expected an unsigned integer but got '{}'.",
- s)[])
- }
- }
- }
-
- fn char_from_u32(&self, n: u32) -> Result<char, Error> {
- match char::from_u32(n) {
- Some(c) => Ok(c),
- None => {
- self.err(&format!("Could not decode '{}' to unicode \
- character.", n)[])
- }
- }
- }
-
- fn pos(&self, c: char) -> Option<uint> {
- self.chars.iter()
- .skip(self.chari).position(|&c2| c2 == c).map(|i| self.chari + i)
- }
-
- fn err<T>(&self, msg: &str) -> Result<T, Error> {
- Err(Error {
- pos: self.chari,
- msg: msg.to_string(),
- })
- }
-
- fn peek(&self, offset: uint) -> Option<char> {
- if self.chari + offset >= self.chars.len() {
- return None
- }
- Some(self.chars[self.chari + offset])
- }
-
- fn peek_is(&self, offset: uint, is: char) -> bool {
- self.peek(offset) == Some(is)
- }
-
- fn cur(&self) -> char {
- self.chars[self.chari]
- }
-
- fn slice(&self, start: uint, end: uint) -> String {
- self.chars[start..end].iter().cloned().collect()
- }
-}
-
-// Given an unordered collection of character ranges, combine_ranges returns
-// an ordered sequence of character ranges where no two ranges overlap. They
-// are ordered from least to greatest (using start position).
-fn combine_ranges(unordered: Vec<(char, char)>) -> Vec<(char, char)> {
- // Returns true iff the two character classes overlap or share a boundary.
- // e.g., ('a', 'g') and ('h', 'm') would return true.
- fn should_merge((a, b): (char, char), (x, y): (char, char)) -> bool {
- cmp::max(a, x) as u32 <= cmp::min(b, y) as u32 + 1
- }
-
- // This is currently O(n^2), but I think with sufficient cleverness,
- // it can be reduced to O(n) **if necessary**.
- let mut ordered: Vec<(char, char)> = Vec::with_capacity(unordered.len());
- for (us, ue) in unordered.into_iter() {
- let (mut us, mut ue) = (us, ue);
- assert!(us <= ue);
- let mut which: Option<uint> = None;
- for (i, &(os, oe)) in ordered.iter().enumerate() {
- if should_merge((us, ue), (os, oe)) {
- us = cmp::min(us, os);
- ue = cmp::max(ue, oe);
- which = Some(i);
- break
- }
- }
- match which {
- None => ordered.push((us, ue)),
- Some(i) => ordered[i] = (us, ue),
- }
- }
- ordered.sort();
- ordered
-}
-
-// Constructs a Unicode friendly Perl character class from \d, \s or \w
-// (or any of their negated forms). Note that this does not handle negation.
-fn perl_unicode_class(which: char) -> Vec<(char, char)> {
- match which.to_lowercase() {
- 'd' => PERLD.to_vec(),
- 's' => PERLS.to_vec(),
- 'w' => PERLW.to_vec(),
- _ => unreachable!(),
- }
-}
-
-// Returns a concatenation of two expressions. This also guarantees that a
-// `Cat` expression will never be a direct child of another `Cat` expression.
-fn concat_flatten(x: Ast, y: Ast) -> Ast {
- match (x, y) {
- (Cat(mut xs), Cat(ys)) => { xs.extend(ys.into_iter()); Cat(xs) }
- (Cat(mut xs), ast) => { xs.push(ast); Cat(xs) }
- (ast, Cat(mut xs)) => { xs.insert(0, ast); Cat(xs) }
- (ast1, ast2) => Cat(vec!(ast1, ast2)),
- }
-}
-
-pub fn is_punct(c: char) -> bool {
- match c {
- '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
- '[' | ']' | '{' | '}' | '^' | '$' => true,
- _ => false,
- }
-}
-
-fn is_valid_cap(c: char) -> bool {
- c == '_' || (c >= '0' && c <= '9')
- || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-}
-
-fn find_class(classes: NamedClasses, name: &str) -> Option<Vec<(char, char)>> {
- match classes.binary_search_by(|&(s, _)| s.cmp(name)) {
- Ok(i) => Some(classes[i].1.to_vec()),
- Err(_) => None,
- }
-}
-
-type Class = &'static [(char, char)];
-type NamedClasses = &'static [(&'static str, &'static Class)];
-
-static ASCII_CLASSES: NamedClasses = &[
- // Classes must be in alphabetical order so that bsearch works.
- // [:alnum:] alphanumeric (== [0-9A-Za-z])
- // [:alpha:] alphabetic (== [A-Za-z])
- // [:ascii:] ASCII (== [\x00-\x7F])
- // [:blank:] blank (== [\t ])
- // [:cntrl:] control (== [\x00-\x1F\x7F])
- // [:digit:] digits (== [0-9])
- // [:graph:] graphical (== [!-~])
- // [:lower:] lower case (== [a-z])
- // [:print:] printable (== [ -~] == [ [:graph:]])
- // [:punct:] punctuation (== [!-/:-@[-`{-~])
- // [:space:] whitespace (== [\t\n\v\f\r ])
- // [:upper:] upper case (== [A-Z])
- // [:word:] word characters (== [0-9A-Za-z_])
- // [:xdigit:] hex digit (== [0-9A-Fa-f])
- // Taken from: http://golang.org/pkg/regex/syntax/
- ("alnum", &ALNUM),
- ("alpha", &ALPHA),
- ("ascii", &ASCII),
- ("blank", &BLANK),
- ("cntrl", &CNTRL),
- ("digit", &DIGIT),
- ("graph", &GRAPH),
- ("lower", &LOWER),
- ("print", &PRINT),
- ("punct", &PUNCT),
- ("space", &SPACE),
- ("upper", &UPPER),
- ("word", &WORD),
- ("xdigit", &XDIGIT),
-];
-
-static ALNUM: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z')];
-static ALPHA: Class = &[('A', 'Z'), ('a', 'z')];
-static ASCII: Class = &[('\x00', '\x7F')];
-static BLANK: Class = &[(' ', ' '), ('\t', '\t')];
-static CNTRL: Class = &[('\x00', '\x1F'), ('\x7F', '\x7F')];
-static DIGIT: Class = &[('0', '9')];
-static GRAPH: Class = &[('!', '~')];
-static LOWER: Class = &[('a', 'z')];
-static PRINT: Class = &[(' ', '~')];
-static PUNCT: Class = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')];
-static SPACE: Class = &[('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'),
- ('\x0C', '\x0C'), ('\r', '\r'), (' ', ' ')];
-static UPPER: Class = &[('A', 'Z')];
-static WORD: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z'), ('_', '_')];
-static XDIGIT: Class = &[('0', '9'), ('A', 'F'), ('a', 'f')];
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-pub use self::NamesIter::*;
-pub use self::Regex::*;
-
-use std::borrow::IntoCow;
-use std::collections::HashMap;
-use std::fmt;
-use std::string::CowString;
-
-use compile::Program;
-use parse;
-use vm;
-use vm::{CaptureLocs, MatchKind, Exists, Location, Submatches};
-
-/// Escapes all regular expression meta characters in `text`.
-///
-/// The string returned may be safely used as a literal in a regular
-/// expression.
-pub fn quote(text: &str) -> String {
- let mut quoted = String::with_capacity(text.len());
- for c in text.chars() {
- if parse::is_punct(c) {
- quoted.push('\\')
- }
- quoted.push(c);
- }
- quoted
-}
-
-/// Tests if the given regular expression matches somewhere in the text given.
-///
-/// If there was a problem compiling the regular expression, an error is
-/// returned.
-///
-/// To find submatches, split or replace text, you'll need to compile an
-/// expression first.
-///
-/// Note that you should prefer the `regex!` macro when possible. For example,
-/// `regex!("...").is_match("...")`.
-pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
- Regex::new(regex).map(|r| r.is_match(text))
-}
-
-/// A compiled regular expression
-#[derive(Clone)]
-pub enum Regex {
- // The representation of `Regex` is exported to support the `regex!`
- // syntax extension. Do not rely on it.
- //
- // See the comments for the `program` module in `lib.rs` for a more
- // detailed explanation for what `regex!` requires.
- #[doc(hidden)]
- Dynamic(ExDynamic),
- #[doc(hidden)]
- Native(ExNative),
-}
-
-#[derive(Clone)]
-#[doc(hidden)]
-pub struct ExDynamic {
- original: String,
- names: Vec<Option<String>>,
- #[doc(hidden)]
- pub prog: Program
-}
-
-#[doc(hidden)]
-#[derive(Copy)]
-pub struct ExNative {
- #[doc(hidden)]
- pub original: &'static str,
- #[doc(hidden)]
- pub names: &'static &'static [Option<&'static str>],
- #[doc(hidden)]
- pub prog: fn(MatchKind, &str, uint, uint) -> Vec<Option<uint>>
-}
-
-impl Clone for ExNative {
- fn clone(&self) -> ExNative {
- *self
- }
-}
-
-impl fmt::Display for Regex {
- /// Shows the original regular expression.
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- fmt::Display::fmt(self.as_str(), f)
- }
-}
-
-impl Regex {
- /// Compiles a dynamic regular expression. Once compiled, it can be
- /// used repeatedly to search, split or replace text in a string.
- ///
- /// When possible, you should prefer the `regex!` macro since it is
- /// safer and always faster.
- ///
- /// If an invalid expression is given, then an error is returned.
- pub fn new(re: &str) -> Result<Regex, parse::Error> {
- let ast = try!(parse::parse(re));
- let (prog, names) = Program::new(ast);
- Ok(Dynamic(ExDynamic {
- original: re.to_string(),
- names: names,
- prog: prog,
- }))
- }
-
- /// Returns true if and only if the regex matches the string given.
- pub fn is_match(&self, text: &str) -> bool {
- has_match(&exec(self, Exists, text))
- }
-
- /// Returns the start and end byte range of the leftmost-first match in
- /// `text`. If no match exists, then `None` is returned.
- pub fn find(&self, text: &str) -> Option<(uint, uint)> {
- let caps = exec(self, Location, text);
- if has_match(&caps) {
- Some((caps[0].unwrap(), caps[1].unwrap()))
- } else {
- None
- }
- }
-
- /// Returns an iterator for each successive non-overlapping match in
- /// `text`, returning the start and end byte indices with respect to
- /// `text`.
- pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> {
- FindMatches {
- re: self,
- search: text,
- last_end: 0,
- last_match: None,
- }
- }
-
- /// Returns the capture groups corresponding to the leftmost-first
- /// match in `text`. Capture group `0` always corresponds to the entire
- /// match. If no match is found, then `None` is returned.
- ///
- /// You should only use `captures` if you need access to submatches.
- /// Otherwise, `find` is faster for discovering the location of the overall
- /// match.
- pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
- let caps = exec(self, Submatches, text);
- Captures::new(self, text, caps)
- }
-
- /// Returns an iterator over all the non-overlapping capture groups matched
- /// in `text`. This is operationally the same as `find_iter` (except it
- /// yields information about submatches).
- pub fn captures_iter<'r, 't>(&'r self, text: &'t str)
- -> FindCaptures<'r, 't> {
- FindCaptures {
- re: self,
- search: text,
- last_match: None,
- last_end: 0,
- }
- }
-
- /// Returns an iterator of substrings of `text` delimited by a match
- /// of the regular expression.
- /// Namely, each element of the iterator corresponds to text that *isn't*
- /// matched by the regular expression.
- ///
- /// This method will *not* copy the text given.
- pub fn split<'r, 't>(&'r self, text: &'t str) -> RegexSplits<'r, 't> {
- RegexSplits {
- finder: self.find_iter(text),
- last: 0,
- }
- }
-
- /// Returns an iterator of at most `limit` substrings of `text` delimited
- /// by a match of the regular expression. (A `limit` of `0` will return no
- /// substrings.)
- /// Namely, each element of the iterator corresponds to text that *isn't*
- /// matched by the regular expression.
- /// The remainder of the string that is not split will be the last element
- /// in the iterator.
- ///
- /// This method will *not* copy the text given.
- pub fn splitn<'r, 't>(&'r self, text: &'t str, limit: uint)
- -> RegexSplitsN<'r, 't> {
- RegexSplitsN {
- splits: self.split(text),
- cur: 0,
- limit: limit,
- }
- }
-
- /// Replaces the leftmost-first match with the replacement provided.
- /// The replacement can be a regular string (where `$N` and `$name` are
- /// expanded to match capture groups) or a function that takes the matches'
- /// `Captures` and returns the replaced string.
- ///
- /// If no match is found, then a copy of the string is returned unchanged.
- pub fn replace<R: Replacer>(&self, text: &str, rep: R) -> String {
- self.replacen(text, 1, rep)
- }
-
- /// Replaces all non-overlapping matches in `text` with the
- /// replacement provided. This is the same as calling `replacen` with
- /// `limit` set to `0`.
- ///
- /// See the documentation for `replace` for details on how to access
- /// submatches in the replacement string.
- pub fn replace_all<R: Replacer>(&self, text: &str, rep: R) -> String {
- self.replacen(text, 0, rep)
- }
-
- /// Replaces at most `limit` non-overlapping matches in `text` with the
- /// replacement provided. If `limit` is 0, then all non-overlapping matches
- /// are replaced.
- ///
- /// See the documentation for `replace` for details on how to access
- /// submatches in the replacement string.
- pub fn replacen<R: Replacer>
- (&self, text: &str, limit: uint, mut rep: R) -> String {
- let mut new = String::with_capacity(text.len());
- let mut last_match = 0u;
-
- for (i, cap) in self.captures_iter(text).enumerate() {
- // It'd be nicer to use the 'take' iterator instead, but it seemed
- // awkward given that '0' => no limit.
- if limit > 0 && i >= limit {
- break
- }
-
- let (s, e) = cap.pos(0).unwrap(); // captures only reports matches
- new.push_str(&text[last_match..s]);
- new.push_str(&rep.reg_replace(&cap)[]);
- last_match = e;
- }
- new.push_str(&text[last_match..text.len()]);
- return new;
- }
-
- /// Returns the original string of this regex.
- pub fn as_str<'a>(&'a self) -> &'a str {
- match *self {
- Dynamic(ExDynamic { ref original, .. }) => &original[],
- Native(ExNative { ref original, .. }) => &original[],
- }
- }
-
- #[doc(hidden)]
- #[unstable]
- pub fn names_iter<'a>(&'a self) -> NamesIter<'a> {
- match *self {
- Native(ref n) => NamesIterNative(n.names.iter()),
- Dynamic(ref d) => NamesIterDynamic(d.names.iter())
- }
- }
-
- fn names_len(&self) -> uint {
- match *self {
- Native(ref n) => n.names.len(),
- Dynamic(ref d) => d.names.len()
- }
- }
-
-}
-
-#[derive(Clone)]
-pub enum NamesIter<'a> {
- NamesIterNative(::std::slice::Iter<'a, Option<&'static str>>),
- NamesIterDynamic(::std::slice::Iter<'a, Option<String>>)
-}
-
-impl<'a> Iterator for NamesIter<'a> {
- type Item = Option<String>;
-
- fn next(&mut self) -> Option<Option<String>> {
- match *self {
- NamesIterNative(ref mut i) => i.next().map(|x| x.map(|s| s.to_string())),
- NamesIterDynamic(ref mut i) => i.next().map(|x| x.as_ref().map(|s| s.to_string())),
- }
- }
-}
-
-/// NoExpand indicates literal string replacement.
-///
-/// It can be used with `replace` and `replace_all` to do a literal
-/// string replacement without expanding `$name` to their corresponding
-/// capture groups.
-///
-/// `'r` is the lifetime of the literal text.
-pub struct NoExpand<'t>(pub &'t str);
-
-/// Replacer describes types that can be used to replace matches in a string.
-pub trait Replacer {
- /// Returns a possibly owned string that is used to replace the match
- /// corresponding to the `caps` capture group.
- ///
- /// The `'a` lifetime refers to the lifetime of a borrowed string when
- /// a new owned string isn't needed (e.g., for `NoExpand`).
- fn reg_replace<'a>(&'a mut self, caps: &Captures) -> CowString<'a>;
-}
-
-impl<'t> Replacer for NoExpand<'t> {
- fn reg_replace<'a>(&'a mut self, _: &Captures) -> CowString<'a> {
- let NoExpand(s) = *self;
- s.into_cow()
- }
-}
-
-impl<'t> Replacer for &'t str {
- fn reg_replace<'a>(&'a mut self, caps: &Captures) -> CowString<'a> {
- caps.expand(*self).into_cow()
- }
-}
-
-impl<F> Replacer for F where F: FnMut(&Captures) -> String {
- fn reg_replace<'a>(&'a mut self, caps: &Captures) -> CowString<'a> {
- (*self)(caps).into_cow()
- }
-}
-
-/// Yields all substrings delimited by a regular expression match.
-///
-/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
-/// of the string being split.
-#[derive(Clone)]
-pub struct RegexSplits<'r, 't> {
- finder: FindMatches<'r, 't>,
- last: uint,
-}
-
-impl<'r, 't> Iterator for RegexSplits<'r, 't> {
- type Item = &'t str;
-
- fn next(&mut self) -> Option<&'t str> {
- let text = self.finder.search;
- match self.finder.next() {
- None => {
- if self.last >= text.len() {
- None
- } else {
- let s = &text[self.last..text.len()];
- self.last = text.len();
- Some(s)
- }
- }
- Some((s, e)) => {
- let matched = &text[self.last..s];
- self.last = e;
- Some(matched)
- }
- }
- }
-}
-
-/// Yields at most `N` substrings delimited by a regular expression match.
-///
-/// The last substring will be whatever remains after splitting.
-///
-/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
-/// of the string being split.
-#[derive(Clone)]
-pub struct RegexSplitsN<'r, 't> {
- splits: RegexSplits<'r, 't>,
- cur: uint,
- limit: uint,
-}
-
-impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
- type Item = &'t str;
-
- fn next(&mut self) -> Option<&'t str> {
- let text = self.splits.finder.search;
- if self.cur >= self.limit {
- None
- } else {
- self.cur += 1;
- if self.cur >= self.limit {
- Some(&text[self.splits.last..text.len()])
- } else {
- self.splits.next()
- }
- }
- }
-}
-
-/// Captures represents a group of captured strings for a single match.
-///
-/// The 0th capture always corresponds to the entire match. Each subsequent
-/// index corresponds to the next capture group in the regex.
-/// If a capture group is named, then the matched string is *also* available
-/// via the `name` method. (Note that the 0th capture is always unnamed and so
-/// must be accessed with the `at` method.)
-///
-/// Positions returned from a capture group are always byte indices.
-///
-/// `'t` is the lifetime of the matched text.
-pub struct Captures<'t> {
- text: &'t str,
- locs: CaptureLocs,
- named: Option<HashMap<String, uint>>,
-}
-
-impl<'t> Captures<'t> {
- #[allow(unstable)]
- fn new(re: &Regex, search: &'t str, locs: CaptureLocs)
- -> Option<Captures<'t>> {
- if !has_match(&locs) {
- return None
- }
-
- let named =
- if re.names_len() == 0 {
- None
- } else {
- let mut named = HashMap::new();
- for (i, name) in re.names_iter().enumerate() {
- match name {
- None => {},
- Some(name) => {
- named.insert(name, i);
- }
- }
- }
- Some(named)
- };
- Some(Captures {
- text: search,
- locs: locs,
- named: named,
- })
- }
-
- /// Returns the start and end positions of the Nth capture group.
- /// Returns `None` if `i` is not a valid capture group or if the capture
- /// group did not match anything.
- /// The positions returned are *always* byte indices with respect to the
- /// original string matched.
- pub fn pos(&self, i: uint) -> Option<(uint, uint)> {
- let (s, e) = (i * 2, i * 2 + 1);
- if e >= self.locs.len() || self.locs[s].is_none() {
- // VM guarantees that each pair of locations are both Some or None.
- return None
- }
- Some((self.locs[s].unwrap(), self.locs[e].unwrap()))
- }
-
- /// Returns the matched string for the capture group `i`. If `i` isn't
- /// a valid capture group or didn't match anything, then `None` is
- /// returned.
- pub fn at(&self, i: uint) -> Option<&'t str> {
- match self.pos(i) {
- None => None,
- Some((s, e)) => Some(&self.text[s.. e])
- }
- }
-
- /// Returns the matched string for the capture group named `name`. If
- /// `name` isn't a valid capture group or didn't match anything, then
- /// `None` is returned.
- pub fn name(&self, name: &str) -> Option<&'t str> {
- match self.named {
- None => None,
- Some(ref h) => {
- match h.get(name) {
- None => None,
- Some(i) => self.at(*i),
- }
- }
- }
- }
-
- /// Creates an iterator of all the capture groups in order of appearance
- /// in the regular expression.
- pub fn iter(&'t self) -> SubCaptures<'t> {
- SubCaptures { idx: 0, caps: self, }
- }
-
- /// Creates an iterator of all the capture group positions in order of
- /// appearance in the regular expression. Positions are byte indices
- /// in terms of the original string matched.
- pub fn iter_pos(&'t self) -> SubCapturesPos<'t> {
- SubCapturesPos { idx: 0, caps: self, }
- }
-
- /// Expands all instances of `$name` in `text` to the corresponding capture
- /// group `name`.
- ///
- /// `name` may be an integer corresponding to the index of the
- /// capture group (counted by order of opening parenthesis where `0` is the
- /// entire match) or it can be a name (consisting of letters, digits or
- /// underscores) corresponding to a named capture group.
- ///
- /// If `name` isn't a valid capture group (whether the name doesn't exist or
- /// isn't a valid index), then it is replaced with the empty string.
- ///
- /// To write a literal `$` use `$$`.
- pub fn expand(&self, text: &str) -> String {
- // How evil can you get?
- // FIXME: Don't use regexes for this. It's completely unnecessary.
- let re = Regex::new(r"(^|[^$]|\b)\$(\w+)").unwrap();
- let text = re.replace_all(text, |&mut: refs: &Captures| -> String {
- let pre = refs.at(1).unwrap_or("");
- let name = refs.at(2).unwrap_or("");
- format!("{}{}", pre,
- match name.parse::<uint>() {
- None => self.name(name).unwrap_or("").to_string(),
- Some(i) => self.at(i).unwrap_or("").to_string(),
- })
- });
- let re = Regex::new(r"\$\$").unwrap();
- re.replace_all(&text[], NoExpand("$"))
- }
-
- /// Returns the number of captured groups.
- #[inline]
- pub fn len(&self) -> uint { self.locs.len() / 2 }
-
- /// Returns if there are no captured groups.
- #[inline]
- pub fn is_empty(&self) -> bool { self.len() == 0 }
-}
-
-/// An iterator over capture groups for a particular match of a regular
-/// expression.
-///
-/// `'t` is the lifetime of the matched text.
-#[derive(Clone)]
-pub struct SubCaptures<'t> {
- idx: uint,
- caps: &'t Captures<'t>,
-}
-
-impl<'t> Iterator for SubCaptures<'t> {
- type Item = &'t str;
-
- fn next(&mut self) -> Option<&'t str> {
- if self.idx < self.caps.len() {
- self.idx += 1;
- Some(self.caps.at(self.idx - 1).unwrap_or(""))
- } else {
- None
- }
- }
-}
-
-/// An iterator over capture group positions for a particular match of a
-/// regular expression.
-///
-/// Positions are byte indices in terms of the original string matched.
-///
-/// `'t` is the lifetime of the matched text.
-#[derive(Clone)]
-pub struct SubCapturesPos<'t> {
- idx: uint,
- caps: &'t Captures<'t>,
-}
-
-impl<'t> Iterator for SubCapturesPos<'t> {
- type Item = Option<(uint, uint)>;
-
- fn next(&mut self) -> Option<Option<(uint, uint)>> {
- if self.idx < self.caps.len() {
- self.idx += 1;
- Some(self.caps.pos(self.idx - 1))
- } else {
- None
- }
- }
-}
-
-/// An iterator that yields all non-overlapping capture groups matching a
-/// particular regular expression.
-///
-/// The iterator stops when no more matches can be found.
-///
-/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
-/// of the matched string.
-#[derive(Clone)]
-pub struct FindCaptures<'r, 't> {
- re: &'r Regex,
- search: &'t str,
- last_match: Option<uint>,
- last_end: uint,
-}
-
-impl<'r, 't> Iterator for FindCaptures<'r, 't> {
- type Item = Captures<'t>;
-
- fn next(&mut self) -> Option<Captures<'t>> {
- if self.last_end > self.search.len() {
- return None
- }
-
- let caps = exec_slice(self.re, Submatches, self.search,
- self.last_end, self.search.len());
- let (s, e) =
- if !has_match(&caps) {
- return None
- } else {
- (caps[0].unwrap(), caps[1].unwrap())
- };
-
- // Don't accept empty matches immediately following a match.
- // i.e., no infinite loops please.
- if e == s && Some(self.last_end) == self.last_match {
- self.last_end += 1;
- return self.next()
- }
- self.last_end = e;
- self.last_match = Some(self.last_end);
- Captures::new(self.re, self.search, caps)
- }
-}
-
-/// An iterator over all non-overlapping matches for a particular string.
-///
-/// The iterator yields a tuple of integers corresponding to the start and end
-/// of the match. The indices are byte offsets. The iterator stops when no more
-/// matches can be found.
-///
-/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
-/// of the matched string.
-#[derive(Clone)]
-pub struct FindMatches<'r, 't> {
- re: &'r Regex,
- search: &'t str,
- last_match: Option<uint>,
- last_end: uint,
-}
-
-impl<'r, 't> Iterator for FindMatches<'r, 't> {
- type Item = (uint, uint);
-
- fn next(&mut self) -> Option<(uint, uint)> {
- if self.last_end > self.search.len() {
- return None
- }
-
- let caps = exec_slice(self.re, Location, self.search,
- self.last_end, self.search.len());
- let (s, e) =
- if !has_match(&caps) {
- return None
- } else {
- (caps[0].unwrap(), caps[1].unwrap())
- };
-
- // Don't accept empty matches immediately following a match.
- // i.e., no infinite loops please.
- if e == s && Some(self.last_end) == self.last_match {
- self.last_end += 1;
- return self.next()
- }
- self.last_end = e;
- self.last_match = Some(self.last_end);
- Some((s, e))
- }
-}
-
-fn exec(re: &Regex, which: MatchKind, input: &str) -> CaptureLocs {
- exec_slice(re, which, input, 0, input.len())
-}
-
-fn exec_slice(re: &Regex, which: MatchKind,
- input: &str, s: uint, e: uint) -> CaptureLocs {
- match *re {
- Dynamic(ExDynamic { ref prog, .. }) => vm::run(which, prog, input, s, e),
- Native(ExNative { ref prog, .. }) => (*prog)(which, input, s, e),
- }
-}
-
-#[inline]
-fn has_match(caps: &CaptureLocs) -> bool {
- caps.len() >= 2 && caps[0].is_some() && caps[1].is_some()
-}
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-#![allow(non_snake_case)]
-
-use std::rand::{Rng, thread_rng};
-use stdtest::Bencher;
-use std::iter::repeat;
-
-use regex::{Regex, NoExpand};
-
-fn bench_assert_match(b: &mut Bencher, re: Regex, text: &str) {
- b.iter(|| if !re.is_match(text) { panic!("no match") });
-}
-
-#[bench]
-fn no_exponential(b: &mut Bencher) {
- let n = 100;
- let re = Regex::new(format!("{}{}",
- repeat("a?").take(n).collect::<String>(),
- repeat("a").take(n).collect::<String>()).as_slice()).unwrap();
- let text = repeat("a").take(n).collect::<String>();
- bench_assert_match(b, re, text.as_slice());
-}
-
-#[bench]
-fn literal(b: &mut Bencher) {
- let re = regex!("y");
- let text = format!("{}y", repeat("x").take(50).collect::<String>());
- bench_assert_match(b, re, text.as_slice());
-}
-
-#[bench]
-fn not_literal(b: &mut Bencher) {
- let re = regex!(".y");
- let text = format!("{}y", repeat("x").take(50).collect::<String>());
- bench_assert_match(b, re, text.as_slice());
-}
-
-#[bench]
-fn match_class(b: &mut Bencher) {
- let re = regex!("[abcdw]");
- let text = format!("{}w", repeat("xxxx").take(20).collect::<String>());
- bench_assert_match(b, re, text.as_slice());
-}
-
-#[bench]
-fn match_class_in_range(b: &mut Bencher) {
- // 'b' is between 'a' and 'c', so the class range checking doesn't help.
- let re = regex!("[ac]");
- let text = format!("{}c", repeat("bbbb").take(20).collect::<String>());
- bench_assert_match(b, re, text.as_slice());
-}
-
-#[bench]
-fn replace_all(b: &mut Bencher) {
- let re = regex!("[cjrw]");
- let text = "abcdefghijklmnopqrstuvwxyz";
- // FIXME: This isn't using the $name expand stuff.
- // It's possible RE2/Go is using it, but currently, the expand in this
- // crate is actually compiling a regex, so it's incredibly slow.
- b.iter(|| re.replace_all(text, NoExpand("")));
-}
-
-#[bench]
-fn anchored_literal_short_non_match(b: &mut Bencher) {
- let re = regex!("^zbc(d|e)");
- let text = "abcdefghijklmnopqrstuvwxyz";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn anchored_literal_long_non_match(b: &mut Bencher) {
- let re = regex!("^zbc(d|e)");
- let text = repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>();
- b.iter(|| re.is_match(text.as_slice()));
-}
-
-#[bench]
-fn anchored_literal_short_match(b: &mut Bencher) {
- let re = regex!("^.bc(d|e)");
- let text = "abcdefghijklmnopqrstuvwxyz";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn anchored_literal_long_match(b: &mut Bencher) {
- let re = regex!("^.bc(d|e)");
- let text = repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>();
- b.iter(|| re.is_match(text.as_slice()));
-}
-
-#[bench]
-fn one_pass_short_a(b: &mut Bencher) {
- let re = regex!("^.bc(d|e)*$");
- let text = "abcddddddeeeededd";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn one_pass_short_a_not(b: &mut Bencher) {
- let re = regex!(".bc(d|e)*$");
- let text = "abcddddddeeeededd";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn one_pass_short_b(b: &mut Bencher) {
- let re = regex!("^.bc(?:d|e)*$");
- let text = "abcddddddeeeededd";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn one_pass_short_b_not(b: &mut Bencher) {
- let re = regex!(".bc(?:d|e)*$");
- let text = "abcddddddeeeededd";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn one_pass_long_prefix(b: &mut Bencher) {
- let re = regex!("^abcdefghijklmnopqrstuvwxyz.*$");
- let text = "abcdefghijklmnopqrstuvwxyz";
- b.iter(|| re.is_match(text));
-}
-
-#[bench]
-fn one_pass_long_prefix_not(b: &mut Bencher) {
- let re = regex!("^.bcdefghijklmnopqrstuvwxyz.*$");
- let text = "abcdefghijklmnopqrstuvwxyz";
- b.iter(|| re.is_match(text));
-}
-
-macro_rules! throughput {
- ($name:ident, $regex:expr, $size:expr) => (
- #[bench]
- fn $name(b: &mut Bencher) {
- let text = gen_text($size);
- b.bytes = $size;
- b.iter(|| if $regex.is_match(text.as_slice()) { panic!("match") });
- }
- );
-}
-
-fn easy0() -> Regex { regex!("ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
-fn easy1() -> Regex { regex!("A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$") }
-fn medium() -> Regex { regex!("[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
-fn hard() -> Regex { regex!("[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
-
-fn gen_text(n: uint) -> String {
- let mut rng = thread_rng();
- let mut bytes = rng.gen_ascii_chars().map(|n| n as u8).take(n)
- .collect::<Vec<u8>>();
- for (i, b) in bytes.iter_mut().enumerate() {
- if i % 20 == 0 {
- *b = b'\n'
- }
- }
- String::from_utf8(bytes).unwrap()
-}
-
-throughput!{easy0_32, easy0(), 32}
-throughput!{easy0_1K, easy0(), 1<<10}
-throughput!{easy0_32K, easy0(), 32<<10}
-
-throughput!{easy1_32, easy1(), 32}
-throughput!{easy1_1K, easy1(), 1<<10}
-throughput!{easy1_32K, easy1(), 32<<10}
-
-throughput!{medium_32, medium(), 32}
-throughput!{medium_1K, medium(), 1<<10}
-throughput!{medium_32K,medium(), 32<<10}
-
-throughput!{hard_32, hard(), 32}
-throughput!{hard_1K, hard(), 1<<10}
-throughput!{hard_32K,hard(), 32<<10}
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// ignore-tidy-linelength
-
-// DO NOT EDIT. Automatically generated by 'src/etc/regex-match-tests'
-// on 2014-04-23 01:33:36.539280.
-
-// Tests from basic.dat
-mat!{match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18))}
-mat!{match_basic_4, r"a...b", r"abababbb", Some((2, 7))}
-mat!{match_basic_5, r"XXXXXX", r"..XXXXXX", Some((2, 8))}
-mat!{match_basic_6, r"\)", r"()", Some((1, 2))}
-mat!{match_basic_7, r"a]", r"a]a", Some((0, 2))}
-mat!{match_basic_9, r"\}", r"}", Some((0, 1))}
-mat!{match_basic_10, r"\]", r"]", Some((0, 1))}
-mat!{match_basic_12, r"]", r"]", Some((0, 1))}
-mat!{match_basic_15, r"^a", r"ax", Some((0, 1))}
-mat!{match_basic_16, r"\^a", r"a^a", Some((1, 3))}
-mat!{match_basic_17, r"a\^", r"a^", Some((0, 2))}
-mat!{match_basic_18, r"a$", r"aa", Some((1, 2))}
-mat!{match_basic_19, r"a\$", r"a$", Some((0, 2))}
-mat!{match_basic_20, r"^$", r"", Some((0, 0))}
-mat!{match_basic_21, r"$^", r"", Some((0, 0))}
-mat!{match_basic_22, r"a($)", r"aa", Some((1, 2)), Some((2, 2))}
-mat!{match_basic_23, r"a*(^a)", r"aa", Some((0, 1)), Some((0, 1))}
-mat!{match_basic_24, r"(..)*(...)*", r"a", Some((0, 0))}
-mat!{match_basic_25, r"(..)*(...)*", r"abcd", Some((0, 4)), Some((2, 4))}
-mat!{match_basic_26, r"(ab|a)(bc|c)", r"abc", Some((0, 3)), Some((0, 2)), Some((2, 3))}
-mat!{match_basic_27, r"(ab)c|abc", r"abc", Some((0, 3)), Some((0, 2))}
-mat!{match_basic_28, r"a{0}b", r"ab", Some((1, 2))}
-mat!{match_basic_29, r"(a*)(b?)(b+)b{3}", r"aaabbbbbbb", Some((0, 10)), Some((0, 3)), Some((3, 4)), Some((4, 7))}
-mat!{match_basic_30, r"(a*)(b{0,1})(b{1,})b{3}", r"aaabbbbbbb", Some((0, 10)), Some((0, 3)), Some((3, 4)), Some((4, 7))}
-mat!{match_basic_32, r"((a|a)|a)", r"a", Some((0, 1)), Some((0, 1)), Some((0, 1))}
-mat!{match_basic_33, r"(a*)(a|aa)", r"aaaa", Some((0, 4)), Some((0, 3)), Some((3, 4))}
-mat!{match_basic_34, r"a*(a.|aa)", r"aaaa", Some((0, 4)), Some((2, 4))}
-mat!{match_basic_35, r"a(b)|c(d)|a(e)f", r"aef", Some((0, 3)), None, None, Some((1, 2))}
-mat!{match_basic_36, r"(a|b)?.*", r"b", Some((0, 1)), Some((0, 1))}
-mat!{match_basic_37, r"(a|b)c|a(b|c)", r"ac", Some((0, 2)), Some((0, 1))}
-mat!{match_basic_38, r"(a|b)c|a(b|c)", r"ab", Some((0, 2)), None, Some((1, 2))}
-mat!{match_basic_39, r"(a|b)*c|(a|ab)*c", r"abc", Some((0, 3)), Some((1, 2))}
-mat!{match_basic_40, r"(a|b)*c|(a|ab)*c", r"xc", Some((1, 2))}
-mat!{match_basic_41, r"(.a|.b).*|.*(.a|.b)", r"xa", Some((0, 2)), Some((0, 2))}
-mat!{match_basic_42, r"a?(ab|ba)ab", r"abab", Some((0, 4)), Some((0, 2))}
-mat!{match_basic_43, r"a?(ac{0}b|ba)ab", r"abab", Some((0, 4)), Some((0, 2))}
-mat!{match_basic_44, r"ab|abab", r"abbabab", Some((0, 2))}
-mat!{match_basic_45, r"aba|bab|bba", r"baaabbbaba", Some((5, 8))}
-mat!{match_basic_46, r"aba|bab", r"baaabbbaba", Some((6, 9))}
-mat!{match_basic_47, r"(aa|aaa)*|(a|aaaaa)", r"aa", Some((0, 2)), Some((0, 2))}
-mat!{match_basic_48, r"(a.|.a.)*|(a|.a...)", r"aa", Some((0, 2)), Some((0, 2))}
-mat!{match_basic_49, r"ab|a", r"xabc", Some((1, 3))}
-mat!{match_basic_50, r"ab|a", r"xxabc", Some((2, 4))}
-mat!{match_basic_51, r"(?i)(Ab|cD)*", r"aBcD", Some((0, 4)), Some((2, 4))}
-mat!{match_basic_52, r"[^-]", r"--a", Some((2, 3))}
-mat!{match_basic_53, r"[a-]*", r"--a", Some((0, 3))}
-mat!{match_basic_54, r"[a-m-]*", r"--amoma--", Some((0, 4))}
-mat!{match_basic_55, r":::1:::0:|:::1:1:0:", r":::0:::1:::1:::0:", Some((8, 17))}
-mat!{match_basic_56, r":::1:::0:|:::1:1:1:", r":::0:::1:::1:::0:", Some((8, 17))}
-mat!{match_basic_57, r"[[:upper:]]", r"A", Some((0, 1))}
-mat!{match_basic_58, r"[[:lower:]]+", r"`az{", Some((1, 3))}
-mat!{match_basic_59, r"[[:upper:]]+", r"@AZ[", Some((1, 3))}
-mat!{match_basic_65, r"
-", r"
-", Some((0, 1))}
-mat!{match_basic_66, r"
-", r"
-", Some((0, 1))}
-mat!{match_basic_67, r"[^a]", r"
-", Some((0, 1))}
-mat!{match_basic_68, r"
-a", r"
-a", Some((0, 2))}
-mat!{match_basic_69, r"(a)(b)(c)", r"abc", Some((0, 3)), Some((0, 1)), Some((1, 2)), Some((2, 3))}
-mat!{match_basic_70, r"xxx", r"xxx", Some((0, 3))}
-mat!{match_basic_71, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"feb 6,", Some((0, 6))}
-mat!{match_basic_72, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"2/7", Some((0, 3))}
-mat!{match_basic_73, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"feb 1,Feb 6", Some((5, 11))}
-mat!{match_basic_74, r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", r"x", Some((0, 1)), Some((0, 1)), Some((0, 1))}
-mat!{match_basic_75, r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", r"xx", Some((0, 2)), Some((1, 2)), Some((1, 2))}
-mat!{match_basic_76, r"a?(ab|ba)*", r"ababababababababababababababababababababababababababababababababababababababababa", Some((0, 81)), Some((79, 81))}
-mat!{match_basic_77, r"abaa|abbaa|abbbaa|abbbbaa", r"ababbabbbabbbabbbbabbbbaa", Some((18, 25))}
-mat!{match_basic_78, r"abaa|abbaa|abbbaa|abbbbaa", r"ababbabbbabbbabbbbabaa", Some((18, 22))}
-mat!{match_basic_79, r"aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", r"baaabbbabac", Some((7, 11))}
-mat!{match_basic_80, r".*", r"\ 1\7f", Some((0, 2))}
-mat!{match_basic_81, r"aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", r"XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", Some((53, 57))}
-mat!{match_basic_83, r"a*a*a*a*a*b", r"aaaaaaaaab", Some((0, 10))}
-mat!{match_basic_84, r"^", r"", Some((0, 0))}
-mat!{match_basic_85, r"$", r"", Some((0, 0))}
-mat!{match_basic_86, r"^$", r"", Some((0, 0))}
-mat!{match_basic_87, r"^a$", r"a", Some((0, 1))}
-mat!{match_basic_88, r"abc", r"abc", Some((0, 3))}
-mat!{match_basic_89, r"abc", r"xabcy", Some((1, 4))}
-mat!{match_basic_90, r"abc", r"ababc", Some((2, 5))}
-mat!{match_basic_91, r"ab*c", r"abc", Some((0, 3))}
-mat!{match_basic_92, r"ab*bc", r"abc", Some((0, 3))}
-mat!{match_basic_93, r"ab*bc", r"abbc", Some((0, 4))}
-mat!{match_basic_94, r"ab*bc", r"abbbbc", Some((0, 6))}
-mat!{match_basic_95, r"ab+bc", r"abbc", Some((0, 4))}
-mat!{match_basic_96, r"ab+bc", r"abbbbc", Some((0, 6))}
-mat!{match_basic_97, r"ab?bc", r"abbc", Some((0, 4))}
-mat!{match_basic_98, r"ab?bc", r"abc", Some((0, 3))}
-mat!{match_basic_99, r"ab?c", r"abc", Some((0, 3))}
-mat!{match_basic_100, r"^abc$", r"abc", Some((0, 3))}
-mat!{match_basic_101, r"^abc", r"abcc", Some((0, 3))}
-mat!{match_basic_102, r"abc$", r"aabc", Some((1, 4))}
-mat!{match_basic_103, r"^", r"abc", Some((0, 0))}
-mat!{match_basic_104, r"$", r"abc", Some((3, 3))}
-mat!{match_basic_105, r"a.c", r"abc", Some((0, 3))}
-mat!{match_basic_106, r"a.c", r"axc", Some((0, 3))}
-mat!{match_basic_107, r"a.*c", r"axyzc", Some((0, 5))}
-mat!{match_basic_108, r"a[bc]d", r"abd", Some((0, 3))}
-mat!{match_basic_109, r"a[b-d]e", r"ace", Some((0, 3))}
-mat!{match_basic_110, r"a[b-d]", r"aac", Some((1, 3))}
-mat!{match_basic_111, r"a[-b]", r"a-", Some((0, 2))}
-mat!{match_basic_112, r"a[b-]", r"a-", Some((0, 2))}
-mat!{match_basic_113, r"a]", r"a]", Some((0, 2))}
-mat!{match_basic_114, r"a[]]b", r"a]b", Some((0, 3))}
-mat!{match_basic_115, r"a[^bc]d", r"aed", Some((0, 3))}
-mat!{match_basic_116, r"a[^-b]c", r"adc", Some((0, 3))}
-mat!{match_basic_117, r"a[^]b]c", r"adc", Some((0, 3))}
-mat!{match_basic_118, r"ab|cd", r"abc", Some((0, 2))}
-mat!{match_basic_119, r"ab|cd", r"abcd", Some((0, 2))}
-mat!{match_basic_120, r"a\(b", r"a(b", Some((0, 3))}
-mat!{match_basic_121, r"a\(*b", r"ab", Some((0, 2))}
-mat!{match_basic_122, r"a\(*b", r"a((b", Some((0, 4))}
-mat!{match_basic_123, r"((a))", r"abc", Some((0, 1)), Some((0, 1)), Some((0, 1))}
-mat!{match_basic_124, r"(a)b(c)", r"abc", Some((0, 3)), Some((0, 1)), Some((2, 3))}
-mat!{match_basic_125, r"a+b+c", r"aabbabc", Some((4, 7))}
-mat!{match_basic_126, r"a*", r"aaa", Some((0, 3))}
-mat!{match_basic_128, r"(a*)*", r"-", Some((0, 0)), None}
-mat!{match_basic_129, r"(a*)+", r"-", Some((0, 0)), Some((0, 0))}
-mat!{match_basic_131, r"(a*|b)*", r"-", Some((0, 0)), None}
-mat!{match_basic_132, r"(a+|b)*", r"ab", Some((0, 2)), Some((1, 2))}
-mat!{match_basic_133, r"(a+|b)+", r"ab", Some((0, 2)), Some((1, 2))}
-mat!{match_basic_134, r"(a+|b)?", r"ab", Some((0, 1)), Some((0, 1))}
-mat!{match_basic_135, r"[^ab]*", r"cde", Some((0, 3))}
-mat!{match_basic_137, r"(^)*", r"-", Some((0, 0)), None}
-mat!{match_basic_138, r"a*", r"", Some((0, 0))}
-mat!{match_basic_139, r"([abc])*d", r"abbbcd", Some((0, 6)), Some((4, 5))}
-mat!{match_basic_140, r"([abc])*bcd", r"abcd", Some((0, 4)), Some((0, 1))}
-mat!{match_basic_141, r"a|b|c|d|e", r"e", Some((0, 1))}
-mat!{match_basic_142, r"(a|b|c|d|e)f", r"ef", Some((0, 2)), Some((0, 1))}
-mat!{match_basic_144, r"((a*|b))*", r"-", Some((0, 0)), None, None}
-mat!{match_basic_145, r"abcd*efg", r"abcdefg", Some((0, 7))}
-mat!{match_basic_146, r"ab*", r"xabyabbbz", Some((1, 3))}
-mat!{match_basic_147, r"ab*", r"xayabbbz", Some((1, 2))}
-mat!{match_basic_148, r"(ab|cd)e", r"abcde", Some((2, 5)), Some((2, 4))}
-mat!{match_basic_149, r"[abhgefdc]ij", r"hij", Some((0, 3))}
-mat!{match_basic_150, r"(a|b)c*d", r"abcd", Some((1, 4)), Some((1, 2))}
-mat!{match_basic_151, r"(ab|ab*)bc", r"abc", Some((0, 3)), Some((0, 1))}
-mat!{match_basic_152, r"a([bc]*)c*", r"abc", Some((0, 3)), Some((1, 3))}
-mat!{match_basic_153, r"a([bc]*)(c*d)", r"abcd", Some((0, 4)), Some((1, 3)), Some((3, 4))}
-mat!{match_basic_154, r"a([bc]+)(c*d)", r"abcd", Some((0, 4)), Some((1, 3)), Some((3, 4))}
-mat!{match_basic_155, r"a([bc]*)(c+d)", r"abcd", Some((0, 4)), Some((1, 2)), Some((2, 4))}
-mat!{match_basic_156, r"a[bcd]*dcdcde", r"adcdcde", Some((0, 7))}
-mat!{match_basic_157, r"(ab|a)b*c", r"abc", Some((0, 3)), Some((0, 2))}
-mat!{match_basic_158, r"((a)(b)c)(d)", r"abcd", Some((0, 4)), Some((0, 3)), Some((0, 1)), Some((1, 2)), Some((3, 4))}
-mat!{match_basic_159, r"[A-Za-z_][A-Za-z0-9_]*", r"alpha", Some((0, 5))}
-mat!{match_basic_160, r"^a(bc+|b[eh])g|.h$", r"abh", Some((1, 3))}
-mat!{match_basic_161, r"(bc+d$|ef*g.|h?i(j|k))", r"effgz", Some((0, 5)), Some((0, 5))}
-mat!{match_basic_162, r"(bc+d$|ef*g.|h?i(j|k))", r"ij", Some((0, 2)), Some((0, 2)), Some((1, 2))}
-mat!{match_basic_163, r"(bc+d$|ef*g.|h?i(j|k))", r"reffgz", Some((1, 6)), Some((1, 6))}
-mat!{match_basic_164, r"(((((((((a)))))))))", r"a", Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1))}
-mat!{match_basic_165, r"multiple words", r"multiple words yeah", Some((0, 14))}
-mat!{match_basic_166, r"(.*)c(.*)", r"abcde", Some((0, 5)), Some((0, 2)), Some((3, 5))}
-mat!{match_basic_167, r"abcd", r"abcd", Some((0, 4))}
-mat!{match_basic_168, r"a(bc)d", r"abcd", Some((0, 4)), Some((1, 3))}
-mat!{match_basic_169, r"a[\ 1-\ 3]?c", r"a\ 2c", Some((0, 3))}
-mat!{match_basic_170, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Qaddafi", Some((0, 15)), None, Some((10, 12))}
-mat!{match_basic_171, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mo'ammar Gadhafi", Some((0, 16)), None, Some((11, 13))}
-mat!{match_basic_172, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Kaddafi", Some((0, 15)), None, Some((10, 12))}
-mat!{match_basic_173, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Qadhafi", Some((0, 15)), None, Some((10, 12))}
-mat!{match_basic_174, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Gadafi", Some((0, 14)), None, Some((10, 11))}
-mat!{match_basic_175, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mu'ammar Qadafi", Some((0, 15)), None, Some((11, 12))}
-mat!{match_basic_176, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moamar Gaddafi", Some((0, 14)), None, Some((9, 11))}
-mat!{match_basic_177, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mu'ammar Qadhdhafi", Some((0, 18)), None, Some((13, 15))}
-mat!{match_basic_178, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Khaddafi", Some((0, 16)), None, Some((11, 13))}
-mat!{match_basic_179, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghaddafy", Some((0, 16)), None, Some((11, 13))}
-mat!{match_basic_180, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghadafi", Some((0, 15)), None, Some((11, 12))}
-mat!{match_basic_181, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghaddafi", Some((0, 16)), None, Some((11, 13))}
-mat!{match_basic_182, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muamar Kaddafi", Some((0, 14)), None, Some((9, 11))}
-mat!{match_basic_183, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Quathafi", Some((0, 16)), None, Some((11, 13))}
-mat!{match_basic_184, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Gheddafi", Some((0, 16)), None, Some((11, 13))}
-mat!{match_basic_185, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moammar Khadafy", Some((0, 15)), None, Some((11, 12))}
-mat!{match_basic_186, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moammar Qudhafi", Some((0, 15)), None, Some((10, 12))}
-mat!{match_basic_187, r"a+(b|c)*d+", r"aabcdd", Some((0, 6)), Some((3, 4))}
-mat!{match_basic_188, r"^.+$", r"vivi", Some((0, 4))}
-mat!{match_basic_189, r"^(.+)$", r"vivi", Some((0, 4)), Some((0, 4))}
-mat!{match_basic_190, r"^([^!.]+).att.com!(.+)$", r"gryphon.att.com!eby", Some((0, 19)), Some((0, 7)), Some((16, 19))}
-mat!{match_basic_191, r"^([^!]+!)?([^!]+)$", r"bas", Some((0, 3)), None, Some((0, 3))}
-mat!{match_basic_192, r"^([^!]+!)?([^!]+)$", r"bar!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))}
-mat!{match_basic_193, r"^([^!]+!)?([^!]+)$", r"foo!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))}
-mat!{match_basic_194, r"^.+!([^!]+!)([^!]+)$", r"foo!bar!bas", Some((0, 11)), Some((4, 8)), Some((8, 11))}
-mat!{match_basic_195, r"((foo)|(bar))!bas", r"bar!bas", Some((0, 7)), Some((0, 3)), None, Some((0, 3))}
-mat!{match_basic_196, r"((foo)|(bar))!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)), None, Some((4, 7))}
-mat!{match_basic_197, r"((foo)|(bar))!bas", r"foo!bas", Some((0, 7)), Some((0, 3)), Some((0, 3))}
-mat!{match_basic_198, r"((foo)|bar)!bas", r"bar!bas", Some((0, 7)), Some((0, 3))}
-mat!{match_basic_199, r"((foo)|bar)!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7))}
-mat!{match_basic_200, r"((foo)|bar)!bas", r"foo!bas", Some((0, 7)), Some((0, 3)), Some((0, 3))}
-mat!{match_basic_201, r"(foo|(bar))!bas", r"bar!bas", Some((0, 7)), Some((0, 3)), Some((0, 3))}
-mat!{match_basic_202, r"(foo|(bar))!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)), Some((4, 7))}
-mat!{match_basic_203, r"(foo|(bar))!bas", r"foo!bas", Some((0, 7)), Some((0, 3))}
-mat!{match_basic_204, r"(foo|bar)!bas", r"bar!bas", Some((0, 7)), Some((0, 3))}
-mat!{match_basic_205, r"(foo|bar)!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7))}
-mat!{match_basic_206, r"(foo|bar)!bas", r"foo!bas", Some((0, 7)), Some((0, 3))}
-mat!{match_basic_207, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bar!bas", Some((0, 11)), Some((0, 11)), None, None, Some((4, 8)), Some((8, 11))}
-mat!{match_basic_208, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"bas", Some((0, 3)), None, Some((0, 3))}
-mat!{match_basic_209, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"bar!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))}
-mat!{match_basic_210, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"foo!bar!bas", Some((0, 11)), None, None, Some((4, 8)), Some((8, 11))}
-mat!{match_basic_211, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"foo!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))}
-mat!{match_basic_212, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"bas", Some((0, 3)), Some((0, 3)), None, Some((0, 3))}
-mat!{match_basic_213, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"bar!bas", Some((0, 7)), Some((0, 7)), Some((0, 4)), Some((4, 7))}
-mat!{match_basic_214, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bar!bas", Some((0, 11)), Some((0, 11)), None, None, Some((4, 8)), Some((8, 11))}
-mat!{match_basic_215, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bas", Some((0, 7)), Some((0, 7)), Some((0, 4)), Some((4, 7))}
-mat!{match_basic_216, r".*(/XXX).*", r"/XXX", Some((0, 4)), Some((0, 4))}
-mat!{match_basic_217, r".*(\\XXX).*", r"\XXX", Some((0, 4)), Some((0, 4))}
-mat!{match_basic_218, r"\\XXX", r"\XXX", Some((0, 4))}
-mat!{match_basic_219, r".*(/000).*", r"/000", Some((0, 4)), Some((0, 4))}
-mat!{match_basic_220, r".*(\\000).*", r"\000", Some((0, 4)), Some((0, 4))}
-mat!{match_basic_221, r"\\000", r"\000", Some((0, 4))}
-
-// Tests from nullsubexpr.dat
-mat!{match_nullsubexpr_3, r"(a*)*", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_5, r"(a*)*", r"x", Some((0, 0)), None}
-mat!{match_nullsubexpr_6, r"(a*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_7, r"(a*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_8, r"(a*)+", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_9, r"(a*)+", r"x", Some((0, 0)), Some((0, 0))}
-mat!{match_nullsubexpr_10, r"(a*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_11, r"(a*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_12, r"(a+)*", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_13, r"(a+)*", r"x", Some((0, 0))}
-mat!{match_nullsubexpr_14, r"(a+)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_15, r"(a+)*", r"aaaaaax", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_16, r"(a+)+", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_17, r"(a+)+", r"x", None}
-mat!{match_nullsubexpr_18, r"(a+)+", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_19, r"(a+)+", r"aaaaaax", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_21, r"([a]*)*", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_23, r"([a]*)*", r"x", Some((0, 0)), None}
-mat!{match_nullsubexpr_24, r"([a]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_25, r"([a]*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_26, r"([a]*)+", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_27, r"([a]*)+", r"x", Some((0, 0)), Some((0, 0))}
-mat!{match_nullsubexpr_28, r"([a]*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_29, r"([a]*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_30, r"([^b]*)*", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_32, r"([^b]*)*", r"b", Some((0, 0)), None}
-mat!{match_nullsubexpr_33, r"([^b]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_34, r"([^b]*)*", r"aaaaaab", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_35, r"([ab]*)*", r"a", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_36, r"([ab]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_37, r"([ab]*)*", r"ababab", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_38, r"([ab]*)*", r"bababa", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_39, r"([ab]*)*", r"b", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_40, r"([ab]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_41, r"([ab]*)*", r"aaaabcde", Some((0, 5)), Some((0, 5))}
-mat!{match_nullsubexpr_42, r"([^a]*)*", r"b", Some((0, 1)), Some((0, 1))}
-mat!{match_nullsubexpr_43, r"([^a]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_45, r"([^a]*)*", r"aaaaaa", Some((0, 0)), None}
-mat!{match_nullsubexpr_46, r"([^ab]*)*", r"ccccxx", Some((0, 6)), Some((0, 6))}
-mat!{match_nullsubexpr_48, r"([^ab]*)*", r"ababab", Some((0, 0)), None}
-mat!{match_nullsubexpr_50, r"((z)+|a)*", r"zabcde", Some((0, 2)), Some((1, 2))}
-mat!{match_nullsubexpr_69, r"(a*)*(x)", r"x", Some((0, 1)), None, Some((0, 1))}
-mat!{match_nullsubexpr_70, r"(a*)*(x)", r"ax", Some((0, 2)), Some((0, 1)), Some((1, 2))}
-mat!{match_nullsubexpr_71, r"(a*)*(x)", r"axa", Some((0, 2)), Some((0, 1)), Some((1, 2))}
-mat!{match_nullsubexpr_73, r"(a*)+(x)", r"x", Some((0, 1)), Some((0, 0)), Some((0, 1))}
-mat!{match_nullsubexpr_74, r"(a*)+(x)", r"ax", Some((0, 2)), Some((0, 1)), Some((1, 2))}
-mat!{match_nullsubexpr_75, r"(a*)+(x)", r"axa", Some((0, 2)), Some((0, 1)), Some((1, 2))}
-mat!{match_nullsubexpr_77, r"(a*){2}(x)", r"x", Some((0, 1)), Some((0, 0)), Some((0, 1))}
-mat!{match_nullsubexpr_78, r"(a*){2}(x)", r"ax", Some((0, 2)), Some((1, 1)), Some((1, 2))}
-mat!{match_nullsubexpr_79, r"(a*){2}(x)", r"axa", Some((0, 2)), Some((1, 1)), Some((1, 2))}
-
-// Tests from repetition.dat
-mat!{match_repetition_10, r"((..)|(.))", r"", None}
-mat!{match_repetition_11, r"((..)|(.))((..)|(.))", r"", None}
-mat!{match_repetition_12, r"((..)|(.))((..)|(.))((..)|(.))", r"", None}
-mat!{match_repetition_14, r"((..)|(.)){1}", r"", None}
-mat!{match_repetition_15, r"((..)|(.)){2}", r"", None}
-mat!{match_repetition_16, r"((..)|(.)){3}", r"", None}
-mat!{match_repetition_18, r"((..)|(.))*", r"", Some((0, 0))}
-mat!{match_repetition_20, r"((..)|(.))", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1))}
-mat!{match_repetition_21, r"((..)|(.))((..)|(.))", r"a", None}
-mat!{match_repetition_22, r"((..)|(.))((..)|(.))((..)|(.))", r"a", None}
-mat!{match_repetition_24, r"((..)|(.)){1}", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1))}
-mat!{match_repetition_25, r"((..)|(.)){2}", r"a", None}
-mat!{match_repetition_26, r"((..)|(.)){3}", r"a", None}
-mat!{match_repetition_28, r"((..)|(.))*", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1))}
-mat!{match_repetition_30, r"((..)|(.))", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_31, r"((..)|(.))((..)|(.))", r"aa", Some((0, 2)), Some((0, 1)), None, Some((0, 1)), Some((1, 2)), None, Some((1, 2))}
-mat!{match_repetition_32, r"((..)|(.))((..)|(.))((..)|(.))", r"aa", None}
-mat!{match_repetition_34, r"((..)|(.)){1}", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_35, r"((..)|(.)){2}", r"aa", Some((0, 2)), Some((1, 2)), None, Some((1, 2))}
-mat!{match_repetition_36, r"((..)|(.)){3}", r"aa", None}
-mat!{match_repetition_38, r"((..)|(.))*", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_40, r"((..)|(.))", r"aaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_41, r"((..)|(.))((..)|(.))", r"aaa", Some((0, 3)), Some((0, 2)), Some((0, 2)), None, Some((2, 3)), None, Some((2, 3))}
-mat!{match_repetition_42, r"((..)|(.))((..)|(.))((..)|(.))", r"aaa", Some((0, 3)), Some((0, 1)), None, Some((0, 1)), Some((1, 2)), None, Some((1, 2)), Some((2, 3)), None, Some((2, 3))}
-mat!{match_repetition_44, r"((..)|(.)){1}", r"aaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_46, r"((..)|(.)){2}", r"aaa", Some((0, 3)), Some((2, 3)), Some((0, 2)), Some((2, 3))}
-mat!{match_repetition_47, r"((..)|(.)){3}", r"aaa", Some((0, 3)), Some((2, 3)), None, Some((2, 3))}
-mat!{match_repetition_50, r"((..)|(.))*", r"aaa", Some((0, 3)), Some((2, 3)), Some((0, 2)), Some((2, 3))}
-mat!{match_repetition_52, r"((..)|(.))", r"aaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_53, r"((..)|(.))((..)|(.))", r"aaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_54, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 3)), None, Some((2, 3)), Some((3, 4)), None, Some((3, 4))}
-mat!{match_repetition_56, r"((..)|(.)){1}", r"aaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_57, r"((..)|(.)){2}", r"aaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_59, r"((..)|(.)){3}", r"aaaa", Some((0, 4)), Some((3, 4)), Some((0, 2)), Some((3, 4))}
-mat!{match_repetition_61, r"((..)|(.))*", r"aaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_63, r"((..)|(.))", r"aaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_64, r"((..)|(.))((..)|(.))", r"aaaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_65, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaaa", Some((0, 5)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None, Some((4, 5)), None, Some((4, 5))}
-mat!{match_repetition_67, r"((..)|(.)){1}", r"aaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_68, r"((..)|(.)){2}", r"aaaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_70, r"((..)|(.)){3}", r"aaaaa", Some((0, 5)), Some((4, 5)), Some((2, 4)), Some((4, 5))}
-mat!{match_repetition_73, r"((..)|(.))*", r"aaaaa", Some((0, 5)), Some((4, 5)), Some((2, 4)), Some((4, 5))}
-mat!{match_repetition_75, r"((..)|(.))", r"aaaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_76, r"((..)|(.))((..)|(.))", r"aaaaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_77, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaaaa", Some((0, 6)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None, Some((4, 6)), Some((4, 6)), None}
-mat!{match_repetition_79, r"((..)|(.)){1}", r"aaaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
-mat!{match_repetition_80, r"((..)|(.)){2}", r"aaaaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None}
-mat!{match_repetition_81, r"((..)|(.)){3}", r"aaaaaa", Some((0, 6)), Some((4, 6)), Some((4, 6)), None}
-mat!{match_repetition_83, r"((..)|(.))*", r"aaaaaa", Some((0, 6)), Some((4, 6)), Some((4, 6)), None}
-mat!{match_repetition_90, r"X(.?){0,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_91, r"X(.?){1,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_92, r"X(.?){2,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_93, r"X(.?){3,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_94, r"X(.?){4,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_95, r"X(.?){5,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_96, r"X(.?){6,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_97, r"X(.?){7,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
-mat!{match_repetition_98, r"X(.?){8,}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_100, r"X(.?){0,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_102, r"X(.?){1,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_104, r"X(.?){2,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_106, r"X(.?){3,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_108, r"X(.?){4,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_110, r"X(.?){5,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_112, r"X(.?){6,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_114, r"X(.?){7,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_115, r"X(.?){8,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
-mat!{match_repetition_126, r"(a|ab|c|bcd){0,}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
-mat!{match_repetition_127, r"(a|ab|c|bcd){1,}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
-mat!{match_repetition_128, r"(a|ab|c|bcd){2,}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))}
-mat!{match_repetition_129, r"(a|ab|c|bcd){3,}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))}
-mat!{match_repetition_130, r"(a|ab|c|bcd){4,}(d*)", r"ababcd", None}
-mat!{match_repetition_131, r"(a|ab|c|bcd){0,10}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
-mat!{match_repetition_132, r"(a|ab|c|bcd){1,10}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
-mat!{match_repetition_133, r"(a|ab|c|bcd){2,10}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))}
-mat!{match_repetition_134, r"(a|ab|c|bcd){3,10}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))}
-mat!{match_repetition_135, r"(a|ab|c|bcd){4,10}(d*)", r"ababcd", None}
-mat!{match_repetition_136, r"(a|ab|c|bcd)*(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
-mat!{match_repetition_137, r"(a|ab|c|bcd)+(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
-mat!{match_repetition_143, r"(ab|a|c|bcd){0,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_145, r"(ab|a|c|bcd){1,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_147, r"(ab|a|c|bcd){2,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_149, r"(ab|a|c|bcd){3,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_150, r"(ab|a|c|bcd){4,}(d*)", r"ababcd", None}
-mat!{match_repetition_152, r"(ab|a|c|bcd){0,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_154, r"(ab|a|c|bcd){1,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_156, r"(ab|a|c|bcd){2,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_158, r"(ab|a|c|bcd){3,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_159, r"(ab|a|c|bcd){4,10}(d*)", r"ababcd", None}
-mat!{match_repetition_161, r"(ab|a|c|bcd)*(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-mat!{match_repetition_163, r"(ab|a|c|bcd)+(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
-
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-macro_rules! regex {
- ($re:expr) => (
- match ::regex::Regex::new($re) {
- Ok(re) => re,
- Err(err) => panic!("{:?}", err),
- }
- );
-}
-
-#[path = "bench.rs"]
-mod dynamic_bench;
-#[path = "tests.rs"]
-mod dynamic_tests;
-
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-use regex::Regex;
-static RE: Regex = regex!(r"\d+");
-
-#[test]
-fn static_splitn() {
- let text = "cauchy123plato456tyler789binx";
- let subs: Vec<&str> = RE.splitn(text, 2).collect();
- assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
-}
-
-#[test]
-fn static_split() {
- let text = "cauchy123plato456tyler789binx";
- let subs: Vec<&str> = RE.split(text).collect();
- assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
-}
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// ignore-tidy-linelength
-// ignore-lexer-test FIXME #15679
-
-use regex::{Regex, NoExpand};
-
-#[test]
-fn splitn() {
- let re = regex!(r"\d+");
- let text = "cauchy123plato456tyler789binx";
- let subs: Vec<&str> = re.splitn(text, 2).collect();
- assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
-}
-
-#[test]
-fn split() {
- let re = regex!(r"\d+");
- let text = "cauchy123plato456tyler789binx";
- let subs: Vec<&str> = re.split(text).collect();
- assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
-}
-
-#[test]
-fn empty_regex_empty_match() {
- let re = regex!("");
- let ms = re.find_iter("").collect::<Vec<(uint, uint)>>();
- assert_eq!(ms, vec![(0, 0)]);
-}
-
-#[test]
-fn empty_regex_nonempty_match() {
- let re = regex!("");
- let ms = re.find_iter("abc").collect::<Vec<(uint, uint)>>();
- assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
-}
-
-#[test]
-fn quoted_bracket_set() {
- let re = regex!(r"([\x{5b}\x{5d}])");
- let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
- assert_eq!(ms, vec![(0, 1), (1, 2)]);
- let re = regex!(r"([\[\]])");
- let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
- assert_eq!(ms, vec![(0, 1), (1, 2)]);
-}
-
-#[test]
-fn first_range_starts_with_left_bracket() {
- let re = regex!(r"([[-z])");
- let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
- assert_eq!(ms, vec![(0, 1), (1, 2)]);
-}
-
-#[test]
-fn range_ends_with_escape() {
- let re = regex!(r"([\[-\x{5d}])");
- let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
- assert_eq!(ms, vec![(0, 1), (1, 2)]);
-}
-
-macro_rules! replace {
- ($name:ident, $which:ident, $re:expr,
- $search:expr, $replace:expr, $result:expr) => (
- #[test]
- fn $name() {
- let re = regex!($re);
- assert_eq!(re.$which($search, $replace), String::from_str($result));
- }
- );
-}
-
-replace!{rep_first, replace, r"\d", "age: 26", "Z", "age: Z6"}
-replace!{rep_plus, replace, r"\d+", "age: 26", "Z", "age: Z"}
-replace!{rep_all, replace_all, r"\d", "age: 26", "Z", "age: ZZ"}
-replace!{rep_groups, replace, r"(\S+)\s+(\S+)", "w1 w2", "$2 $1", "w2 w1"}
-replace!{rep_double_dollar, replace,
- r"(\S+)\s+(\S+)", "w1 w2", "$2 $$1", "w2 $1"}
-replace!{rep_no_expand, replace,
- r"(\S+)\s+(\S+)", "w1 w2", NoExpand("$2 $1"), "$2 $1"}
-replace!{rep_named, replace_all,
- r"(?P<first>\S+)\s+(?P<last>\S+)(?P<space>\s*)",
- "w1 w2 w3 w4", "$last $first$space", "w2 w1 w4 w3"}
-replace!{rep_trim, replace_all, "^[ \t]+|[ \t]+$", " \t trim me\t \t",
- "", "trim me"}
-
-macro_rules! noparse {
- ($name:ident, $re:expr) => (
- #[test]
- fn $name() {
- let re = $re;
- match Regex::new(re) {
- Err(_) => {},
- Ok(_) => panic!("Regex '{}' should cause a parse error.", re),
- }
- }
- );
-}
-
-noparse!{fail_double_repeat, "a**"}
-noparse!{fail_no_repeat_arg, "*"}
-noparse!{fail_no_repeat_arg_begin, "^*"}
-noparse!{fail_incomplete_escape, "\\"}
-noparse!{fail_class_incomplete, "[A-"}
-noparse!{fail_class_not_closed, "[A"}
-noparse!{fail_class_no_begin, r"[\A]"}
-noparse!{fail_class_no_end, r"[\z]"}
-noparse!{fail_class_no_boundary, r"[\b]"}
-noparse!{fail_open_paren, "("}
-noparse!{fail_close_paren, ")"}
-noparse!{fail_invalid_range, "[a-Z]"}
-noparse!{fail_empty_capture_name, "(?P<>a)"}
-noparse!{fail_empty_capture_exp, "(?P<name>)"}
-noparse!{fail_bad_capture_name, "(?P<na-me>)"}
-noparse!{fail_bad_flag, "(?a)a"}
-noparse!{fail_empty_alt_before, "|a"}
-noparse!{fail_empty_alt_after, "a|"}
-noparse!{fail_counted_big_exact, "a{1001}"}
-noparse!{fail_counted_big_min, "a{1001,}"}
-noparse!{fail_counted_no_close, "a{1001"}
-noparse!{fail_unfinished_cap, "(?"}
-noparse!{fail_unfinished_escape, "\\"}
-noparse!{fail_octal_digit, r"\8"}
-noparse!{fail_hex_digit, r"\xG0"}
-noparse!{fail_hex_short, r"\xF"}
-noparse!{fail_hex_long_digits, r"\x{fffg}"}
-noparse!{fail_flag_bad, "(?a)"}
-noparse!{fail_flag_empty, "(?)"}
-noparse!{fail_double_neg, "(?-i-i)"}
-noparse!{fail_neg_empty, "(?i-)"}
-noparse!{fail_empty_group, "()"}
-noparse!{fail_dupe_named, "(?P<a>.)(?P<a>.)"}
-noparse!{fail_range_end_no_class, "[a-[:lower:]]"}
-noparse!{fail_range_end_no_begin, r"[a-\A]"}
-noparse!{fail_range_end_no_end, r"[a-\z]"}
-noparse!{fail_range_end_no_boundary, r"[a-\b]"}
-noparse!{fail_repeat_no_expr, r"-|+"}
-
-macro_rules! mat {
- ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
- #[test]
- fn $name() {
- let text = $text;
- let expected: Vec<Option<(uint, uint)>> = vec!($($loc)+);
- let r = regex!($re);
- let got = match r.captures(text) {
- Some(c) => c.iter_pos().collect::<Vec<Option<(uint, uint)>>>(),
- None => vec!(None),
- };
- // The test set sometimes leave out capture groups, so truncate
- // actual capture groups to match test set.
- let mut sgot = got.as_slice();
- if sgot.len() > expected.len() {
- sgot = &sgot[..expected.len()]
- }
- if expected != sgot {
- panic!("For RE '{}' against '{}', expected '{:?}' but got '{:?}'",
- $re, text, expected, sgot);
- }
- }
- );
-}
-
-// Some crazy expressions from regular-expressions.info.
-mat!{match_ranges,
- r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
- "num: 255", Some((5, 8))}
-mat!{match_ranges_not,
- r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
- "num: 256", None}
-mat!{match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3))}
-mat!{match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3))}
-mat!{match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4))}
-mat!{match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None}
-mat!{match_email, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
- "mine is jam.slam@gmail.com ", Some((8, 26))}
-mat!{match_email_not, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
- "mine is jam.slam@gmail ", None}
-mat!{match_email_big, r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
- "mine is jam.slam@gmail.com ", Some((8, 26))}
-mat!{match_date1,
- r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
- "1900-01-01", Some((0, 10))}
-mat!{match_date2,
- r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
- "1900-00-01", None}
-mat!{match_date3,
- r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
- "1900-13-01", None}
-
-// Exercise the flags.
-mat!{match_flag_case, "(?i)abc", "ABC", Some((0, 3))}
-mat!{match_flag_weird_case, "(?i)a(?-i)bc", "Abc", Some((0, 3))}
-mat!{match_flag_weird_case_not, "(?i)a(?-i)bc", "ABC", None}
-mat!{match_flag_case_dotnl, "(?is)a.", "A\n", Some((0, 2))}
-mat!{match_flag_case_dotnl_toggle, "(?is)a.(?-is)a.", "A\nab", Some((0, 4))}
-mat!{match_flag_case_dotnl_toggle_not, "(?is)a.(?-is)a.", "A\na\n", None}
-mat!{match_flag_case_dotnl_toggle_ok, "(?is)a.(?-is:a.)?", "A\na\n", Some((0, 2))}
-mat!{match_flag_multi, "(?m)(?:^\\d+$\n?)+", "123\n456\n789", Some((0, 11))}
-mat!{match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1))}
-mat!{match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2))}
-mat!{match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2))}
-
-// Some Unicode tests.
-// A couple of these are commented out because something in the guts of macro expansion is creating
-// invalid byte strings.
-//mat!{uni_literal, r"â… ", "â… ", Some((0, 3))}
-mat!{uni_one, r"\pN", "â… ", Some((0, 3))}
-mat!{uni_mixed, r"\pN+", "â… 1â…¡2", Some((0, 8))}
-mat!{uni_not, r"\PN+", "abâ… ", Some((0, 2))}
-mat!{uni_not_class, r"[\PN]+", "abâ… ", Some((0, 2))}
-mat!{uni_not_class_neg, r"[^\PN]+", "abâ… ", Some((2, 5))}
-mat!{uni_case, r"(?i)Δ", "δ", Some((0, 2))}
-//mat!{uni_case_not, r"Δ", "δ", None}
-mat!{uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8))}
-mat!{uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10))}
-mat!{uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10))}
-mat!{uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10))}
-
-// Test the Unicode friendliness of Perl character classes.
-mat!{uni_perl_w, r"\w+", "dδd", Some((0, 4))}
-mat!{uni_perl_w_not, r"\w+", "⥡", None}
-mat!{uni_perl_w_neg, r"\W+", "⥡", Some((0, 3))}
-mat!{uni_perl_d, r"\d+", "1२३9", Some((0, 8))}
-mat!{uni_perl_d_not, r"\d+", "â…¡", None}
-mat!{uni_perl_d_neg, r"\D+", "â…¡", Some((0, 3))}
-mat!{uni_perl_s, r"\s+", " ", Some((0, 3))}
-mat!{uni_perl_s_not, r"\s+", "☃", None}
-mat!{uni_perl_s_neg, r"\S+", "☃", Some((0, 3))}
-
-// And do the same for word boundaries.
-mat!{uni_boundary_none, r"\d\b", "6δ", None}
-mat!{uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1))}
-
-// A whole mess of tests from Glenn Fowler's regex test suite.
-// Generated by the 'src/etc/regex-match-tests' program.
-mod matches;
+++ /dev/null
-The following license covers testregex.c and all associated test data.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
-without restriction, including without limitation the rights to use,
-copy, modify, merge, publish, distribute, and/or sell copies of the
-Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following disclaimer:
-
-THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+++ /dev/null
-Test data was taken from the Go distribution, which was in turn taken from the
-testregex test suite:
-
- http://www2.research.att.com/~astopen/testregex/testregex.html
-
-The LICENSE in this directory corresponds to the LICENSE that the data was
-released under.
-
-The tests themselves were modified for RE2/Go. A couple were modified further
-by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them.
-(Yes, it seems like RE2/Go includes failing test cases.) This may or may not
-have been a bad idea, but I think being consistent with an established Regex
-library is worth something.
-
-Note that these files are read by 'src/etc/regexp-match-tests' and turned into
-Rust tests found in 'src/libregexp/tests/matches.rs'.
-
+++ /dev/null
-NOTE all standard compliant implementations should pass these : 2002-05-31
-
-BE abracadabra$ abracadabracadabra (7,18)
-BE a...b abababbb (2,7)
-BE XXXXXX ..XXXXXX (2,8)
-E \) () (1,2)
-BE a] a]a (0,2)
-B } } (0,1)
-E \} } (0,1)
-BE \] ] (0,1)
-B ] ] (0,1)
-E ] ] (0,1)
-B { { (0,1)
-B } } (0,1)
-BE ^a ax (0,1)
-BE \^a a^a (1,3)
-BE a\^ a^ (0,2)
-BE a$ aa (1,2)
-BE a\$ a$ (0,2)
-BE ^$ NULL (0,0)
-E $^ NULL (0,0)
-E a($) aa (1,2)(2,2)
-E a*(^a) aa (0,1)(0,1)
-E (..)*(...)* a (0,0)
-E (..)*(...)* abcd (0,4)(2,4)
-E (ab|a)(bc|c) abc (0,3)(0,2)(2,3)
-E (ab)c|abc abc (0,3)(0,2)
-E a{0}b ab (1,2)
-E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
-E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
-E a{9876543210} NULL BADBR
-E ((a|a)|a) a (0,1)(0,1)(0,1)
-E (a*)(a|aa) aaaa (0,4)(0,3)(3,4)
-E a*(a.|aa) aaaa (0,4)(2,4)
-E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2)
-E (a|b)?.* b (0,1)(0,1)
-E (a|b)c|a(b|c) ac (0,2)(0,1)
-E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2)
-E (a|b)*c|(a|ab)*c abc (0,3)(1,2)
-E (a|b)*c|(a|ab)*c xc (1,2)
-E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)
-E a?(ab|ba)ab abab (0,4)(0,2)
-E a?(ac{0}b|ba)ab abab (0,4)(0,2)
-E ab|abab abbabab (0,2)
-E aba|bab|bba baaabbbaba (5,8)
-E aba|bab baaabbbaba (6,9)
-E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)
-E (a.|.a.)*|(a|.a...) aa (0,2)(0,2)
-E ab|a xabc (1,3)
-E ab|a xxabc (2,4)
-Ei (Ab|cD)* aBcD (0,4)(2,4)
-BE [^-] --a (2,3)
-BE [a-]* --a (0,3)
-BE [a-m-]* --amoma-- (0,4)
-E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17)
-E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17)
-{E [[:upper:]] A (0,1) [[<element>]] not supported
-E [[:lower:]]+ `az{ (1,3)
-E [[:upper:]]+ @AZ[ (1,3)
-# No collation in Go
-#BE [[-]] [[-]] (2,4)
-#BE [[.NIL.]] NULL ECOLLATE
-#BE [[=aleph=]] NULL ECOLLATE
-}
-BE$ \n \n (0,1)
-BEn$ \n \n (0,1)
-BE$ [^a] \n (0,1)
-BE$ \na \na (0,2)
-E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3)
-BE xxx xxx (0,3)
-E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6)
-E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3)
-E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11)
-E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)
-E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)
-E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81)
-E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25)
-E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22)
-E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11)
-BE$ .* \x01\x7f (0,2)
-E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57)
-L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH
-E a*a*a*a*a*b aaaaaaaaab (0,10)
-BE ^ NULL (0,0)
-BE $ NULL (0,0)
-BE ^$ NULL (0,0)
-BE ^a$ a (0,1)
-BE abc abc (0,3)
-BE abc xabcy (1,4)
-BE abc ababc (2,5)
-BE ab*c abc (0,3)
-BE ab*bc abc (0,3)
-BE ab*bc abbc (0,4)
-BE ab*bc abbbbc (0,6)
-E ab+bc abbc (0,4)
-E ab+bc abbbbc (0,6)
-E ab?bc abbc (0,4)
-E ab?bc abc (0,3)
-E ab?c abc (0,3)
-BE ^abc$ abc (0,3)
-BE ^abc abcc (0,3)
-BE abc$ aabc (1,4)
-BE ^ abc (0,0)
-BE $ abc (3,3)
-BE a.c abc (0,3)
-BE a.c axc (0,3)
-BE a.*c axyzc (0,5)
-BE a[bc]d abd (0,3)
-BE a[b-d]e ace (0,3)
-BE a[b-d] aac (1,3)
-BE a[-b] a- (0,2)
-BE a[b-] a- (0,2)
-BE a] a] (0,2)
-BE a[]]b a]b (0,3)
-BE a[^bc]d aed (0,3)
-BE a[^-b]c adc (0,3)
-BE a[^]b]c adc (0,3)
-E ab|cd abc (0,2)
-E ab|cd abcd (0,2)
-E a\(b a(b (0,3)
-E a\(*b ab (0,2)
-E a\(*b a((b (0,4)
-E ((a)) abc (0,1)(0,1)(0,1)
-E (a)b(c) abc (0,3)(0,1)(2,3)
-E a+b+c aabbabc (4,7)
-E a* aaa (0,3)
-#E (a*)* - (0,0)(0,0)
-E (a*)* - (0,0)(?,?) RE2/Go
-E (a*)+ - (0,0)(0,0)
-#E (a*|b)* - (0,0)(0,0)
-E (a*|b)* - (0,0)(?,?) RE2/Go
-E (a+|b)* ab (0,2)(1,2)
-E (a+|b)+ ab (0,2)(1,2)
-E (a+|b)? ab (0,1)(0,1)
-BE [^ab]* cde (0,3)
-#E (^)* - (0,0)(0,0)
-E (^)* - (0,0)(?,?) RE2/Go
-BE a* NULL (0,0)
-E ([abc])*d abbbcd (0,6)(4,5)
-E ([abc])*bcd abcd (0,4)(0,1)
-E a|b|c|d|e e (0,1)
-E (a|b|c|d|e)f ef (0,2)(0,1)
-#E ((a*|b))* - (0,0)(0,0)(0,0)
-E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go
-BE abcd*efg abcdefg (0,7)
-BE ab* xabyabbbz (1,3)
-BE ab* xayabbbz (1,2)
-E (ab|cd)e abcde (2,5)(2,4)
-BE [abhgefdc]ij hij (0,3)
-E (a|b)c*d abcd (1,4)(1,2)
-E (ab|ab*)bc abc (0,3)(0,1)
-E a([bc]*)c* abc (0,3)(1,3)
-E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4)
-E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4)
-E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4)
-E a[bcd]*dcdcde adcdcde (0,7)
-E (ab|a)b*c abc (0,3)(0,2)
-E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
-BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
-E ^a(bc+|b[eh])g|.h$ abh (1,3)
-E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
-E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
-E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)
-E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
-BE multiple words multiple words yeah (0,14)
-E (.*)c(.*) abcde (0,5)(0,2)(3,5)
-BE abcd abcd (0,4)
-E a(bc)d abcd (0,4)(1,3)
-E a[\ 1-\ 3]?c a\ 2c (0,3)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12)
-E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12)
-E a+(b|c)*d+ aabcdd (0,6)(3,4)
-E ^.+$ vivi (0,4)
-E ^(.+)$ vivi (0,4)(0,4)
-E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19)
-E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3)
-E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7)
-E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7)
-E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11)
-E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3)
-E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7)
-E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)
-E ((foo)|bar)!bas bar!bas (0,7)(0,3)
-E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)
-E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3)
-E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3)
-E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7)
-E (foo|(bar))!bas foo!bas (0,7)(0,3)
-E (foo|bar)!bas bar!bas (0,7)(0,3)
-E (foo|bar)!bas foo!bar!bas (4,11)(4,7)
-E (foo|bar)!bas foo!bas (0,7)(0,3)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
-E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)
-E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)
-E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11)
-E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
-E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)
-E .*(/XXX).* /XXX (0,4)(0,4)
-E .*(\\XXX).* \XXX (0,4)(0,4)
-E \\XXX \XXX (0,4)
-E .*(/000).* /000 (0,4)(0,4)
-E .*(\\000).* \000 (0,4)(0,4)
-E \\000 \000 (0,4)
+++ /dev/null
-NOTE null subexpression matches : 2002-06-06
-
-E (a*)* a (0,1)(0,1)
-#E SAME x (0,0)(0,0)
-E SAME x (0,0)(?,?) RE2/Go
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E (a*)+ a (0,1)(0,1)
-E SAME x (0,0)(0,0)
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E (a+)* a (0,1)(0,1)
-E SAME x (0,0)
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E (a+)+ a (0,1)(0,1)
-E SAME x NOMATCH
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-
-E ([a]*)* a (0,1)(0,1)
-#E SAME x (0,0)(0,0)
-E SAME x (0,0)(?,?) RE2/Go
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E ([a]*)+ a (0,1)(0,1)
-E SAME x (0,0)(0,0)
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaax (0,6)(0,6)
-E ([^b]*)* a (0,1)(0,1)
-#E SAME b (0,0)(0,0)
-E SAME b (0,0)(?,?) RE2/Go
-E SAME aaaaaa (0,6)(0,6)
-E SAME aaaaaab (0,6)(0,6)
-E ([ab]*)* a (0,1)(0,1)
-E SAME aaaaaa (0,6)(0,6)
-E SAME ababab (0,6)(0,6)
-E SAME bababa (0,6)(0,6)
-E SAME b (0,1)(0,1)
-E SAME bbbbbb (0,6)(0,6)
-E SAME aaaabcde (0,5)(0,5)
-E ([^a]*)* b (0,1)(0,1)
-E SAME bbbbbb (0,6)(0,6)
-#E SAME aaaaaa (0,0)(0,0)
-E SAME aaaaaa (0,0)(?,?) RE2/Go
-E ([^ab]*)* ccccxx (0,6)(0,6)
-#E SAME ababab (0,0)(0,0)
-E SAME ababab (0,0)(?,?) RE2/Go
-
-E ((z)+|a)* zabcde (0,2)(1,2)
-
-#{E a+? aaaaaa (0,1) no *? +? mimimal match ops
-#E (a) aaa (0,1)(0,1)
-#E (a*?) aaa (0,0)(0,0)
-#E (a)*? aaa (0,0)
-#E (a*?)*? aaa (0,0)
-#}
-
-B \(a*\)*\(x\) x (0,1)(0,0)(0,1)
-B \(a*\)*\(x\) ax (0,2)(0,1)(1,2)
-B \(a*\)*\(x\) axa (0,2)(0,1)(1,2)
-B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1)
-B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2)
-B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
-B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
-B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)
-
-#E (a*)*(x) x (0,1)(0,0)(0,1)
-E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go
-E (a*)*(x) ax (0,2)(0,1)(1,2)
-E (a*)*(x) axa (0,2)(0,1)(1,2)
-
-E (a*)+(x) x (0,1)(0,0)(0,1)
-E (a*)+(x) ax (0,2)(0,1)(1,2)
-E (a*)+(x) axa (0,2)(0,1)(1,2)
-
-E (a*){2}(x) x (0,1)(0,0)(0,1)
-E (a*){2}(x) ax (0,2)(1,1)(1,2)
-E (a*){2}(x) axa (0,2)(1,1)(1,2)
+++ /dev/null
-NOTE implicit vs. explicit repetitions : 2009-02-02
-
-# Glenn Fowler <gsf@research.att.com>
-# conforming matches (column 4) must match one of the following BREs
-# NOMATCH
-# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
-# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
-# i.e., each 3-tuple has two identical elements and one (?,?)
-
-E ((..)|(.)) NULL NOMATCH
-E ((..)|(.))((..)|(.)) NULL NOMATCH
-E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH
-
-E ((..)|(.)){1} NULL NOMATCH
-E ((..)|(.)){2} NULL NOMATCH
-E ((..)|(.)){3} NULL NOMATCH
-
-E ((..)|(.))* NULL (0,0)
-
-E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1)
-E ((..)|(.))((..)|(.)) a NOMATCH
-E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH
-
-E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1)
-E ((..)|(.)){2} a NOMATCH
-E ((..)|(.)){3} a NOMATCH
-
-E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1)
-
-E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
-E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH
-
-E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2)
-E ((..)|(.)){3} aa NOMATCH
-
-E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?)
-
-E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
-E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
-
-E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?)
-#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3)
-E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
-E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3)
-
-#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3)
-E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
-
-E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
-E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
-
-E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?)
-#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4)
-E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go
-
-E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?)
-
-E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
-E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
-
-E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?)
-#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5)
-E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
-
-#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5)
-E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
-
-E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
-E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
-
-E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?)
-E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?)
-E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?)
-
-E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?)
-
-NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
-
-# These test a bug in OS X / FreeBSD / NetBSD, and libtree.
-# Linux/GLIBC gets the {8,} and {8,8} wrong.
-
-:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8)
-:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8)
-:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8)
-:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8)
-:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8)
-:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8)
-:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8)
-:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8)
-:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8)
-#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8)
-:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8)
-:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8)
-:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8)
-:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8)
-:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8)
-:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8)
-:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go
-#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8)
-:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go
-:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8)
-
-# These test a fixed bug in my regex-tdfa that did not keep the expanded
-# form properly grouped, so right association did the wrong thing with
-# these ambiguous patterns (crafted just to test my code when I became
-# suspicious of my implementation). The first subexpression should use
-# "ab" then "a" then "bcd".
-
-# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
-# results like (0,6)(4,5)(6,6).
-
-:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1)
-:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1)
-:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH
-:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1)
-:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1)
-:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH
-:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1)
-:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1)
-
-# The above worked on Linux/GLIBC but the following often fail.
-# They also trip up OS X / FreeBSD / NetBSD:
-
-#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH
-#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
-:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH
-#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
-:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
-#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
-:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
+++ /dev/null
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-// FIXME: Currently, the VM simulates an NFA. It would be nice to have another
-// VM that simulates a DFA.
-//
-// According to Russ Cox[1], a DFA performs better than an NFA, principally
-// because it reuses states previously computed by the machine *and* doesn't
-// keep track of capture groups. The drawback of a DFA (aside from its
-// complexity) is that it can't accurately return the locations of submatches.
-// The NFA *can* do that. (This is my understanding anyway.)
-//
-// Cox suggests that a DFA ought to be used to answer "does this match" and
-// "where does it match" questions. (In the latter, the starting position of
-// the match is computed by executing the regex backwards.) Cox also suggests
-// that a DFA should be run when asking "where are the submatches", which can
-// 1) quickly answer "no" is there's no match and 2) discover the substring
-// that matches, which means running the NFA on smaller input.
-//
-// Currently, the NFA simulation implemented below does some dirty tricks to
-// avoid tracking capture groups when they aren't needed (which only works
-// for 'is_match', not 'find'). This is a half-measure, but does provide some
-// perf improvement.
-//
-// AFAIK, the DFA/NFA approach is implemented in RE2/C++ but *not* in RE2/Go.
-//
-// [1] - http://swtch.com/~rsc/regex/regex3.html
-
-pub use self::MatchKind::*;
-pub use self::StepState::*;
-
-use std::cmp;
-use std::cmp::Ordering::{self, Less, Equal, Greater};
-use std::mem;
-use std::iter::repeat;
-use std::slice::SliceExt;
-use compile::{
- Program,
- Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd, EmptyWordBoundary,
- Save, Jump, Split,
-};
-use parse::{FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL, FLAG_NEGATED};
-use unicode::regex::PERLW;
-
-pub type CaptureLocs = Vec<Option<uint>>;
-
-/// Indicates the type of match to be performed by the VM.
-#[derive(Copy)]
-pub enum MatchKind {
- /// Only checks if a match exists or not. Does not return location.
- Exists,
- /// Returns the start and end indices of the entire match in the input
- /// given.
- Location,
- /// Returns the start and end indices of each submatch in the input given.
- Submatches,
-}
-
-/// Runs an NFA simulation on the compiled expression given on the search text
-/// `input`. The search begins at byte index `start` and ends at byte index
-/// `end`. (The range is specified here so that zero-width assertions will work
-/// correctly when searching for successive non-overlapping matches.)
-///
-/// The `which` parameter indicates what kind of capture information the caller
-/// wants. There are three choices: match existence only, the location of the
-/// entire match or the locations of the entire match in addition to the
-/// locations of each submatch.
-pub fn run<'r, 't>(which: MatchKind, prog: &'r Program, input: &'t str,
- start: uint, end: uint) -> CaptureLocs {
- Nfa {
- which: which,
- prog: prog,
- input: input,
- start: start,
- end: end,
- ic: 0,
- chars: CharReader::new(input),
- }.run()
-}
-
-struct Nfa<'r, 't> {
- which: MatchKind,
- prog: &'r Program,
- input: &'t str,
- start: uint,
- end: uint,
- ic: uint,
- chars: CharReader<'t>,
-}
-
-/// Indicates the next action to take after a single non-empty instruction
-/// is processed.
-#[derive(Copy)]
-pub enum StepState {
- /// This is returned if and only if a Match instruction is reached and
- /// we only care about the existence of a match. It instructs the VM to
- /// quit early.
- StepMatchEarlyReturn,
- /// Indicates that a match was found. Thus, the rest of the states in the
- /// *current* queue should be dropped (i.e., leftmost-first semantics).
- /// States in the "next" queue can still be processed.
- StepMatch,
- /// No match was found. Continue with the next state in the queue.
- StepContinue,
-}
-
-impl<'r, 't> Nfa<'r, 't> {
- fn run(&mut self) -> CaptureLocs {
- let ncaps = match self.which {
- Exists => 0,
- Location => 1,
- Submatches => self.prog.num_captures(),
- };
- let mut matched = false;
- let ninsts = self.prog.insts.len();
- let mut clist = &mut Threads::new(self.which, ninsts, ncaps);
- let mut nlist = &mut Threads::new(self.which, ninsts, ncaps);
-
- let mut groups: Vec<_> = repeat(None).take(ncaps * 2).collect();
-
- // Determine if the expression starts with a '^' so we can avoid
- // simulating .*?
- // Make sure multi-line mode isn't enabled for it, otherwise we can't
- // drop the initial .*?
- let prefix_anchor =
- match self.prog.insts[1] {
- EmptyBegin(flags) if flags & FLAG_MULTI == 0 => true,
- _ => false,
- };
-
- self.ic = self.start;
- let mut next_ic = self.chars.set(self.start);
- while self.ic <= self.end {
- if clist.size == 0 {
- // We have a match and we're done exploring alternatives.
- // Time to quit.
- if matched {
- break
- }
-
- // If there are no threads to try, then we'll have to start
- // over at the beginning of the regex.
- // BUT, if there's a literal prefix for the program, try to
- // jump ahead quickly. If it can't be found, then we can bail
- // out early.
- if self.prog.prefix.len() > 0 && clist.size == 0 {
- let needle = self.prog.prefix.as_bytes();
- let haystack = &self.input.as_bytes()[self.ic..];
- match find_prefix(needle, haystack) {
- None => break,
- Some(i) => {
- self.ic += i;
- next_ic = self.chars.set(self.ic);
- }
- }
- }
- }
-
- // This simulates a preceding '.*?' for every regex by adding
- // a state starting at the current position in the input for the
- // beginning of the program only if we don't already have a match.
- if clist.size == 0 || (!prefix_anchor && !matched) {
- self.add(clist, 0, groups.as_mut_slice())
- }
-
- // Now we try to read the next character.
- // As a result, the 'step' method will look at the previous
- // character.
- self.ic = next_ic;
- next_ic = self.chars.advance();
-
- for i in range(0, clist.size) {
- let pc = clist.pc(i);
- let step_state = self.step(groups.as_mut_slice(), nlist,
- clist.groups(i), pc);
- match step_state {
- StepMatchEarlyReturn => return vec![Some(0), Some(0)],
- StepMatch => { matched = true; break },
- StepContinue => {},
- }
- }
- mem::swap(&mut clist, &mut nlist);
- nlist.empty();
- }
- match self.which {
- Exists if matched => vec![Some(0), Some(0)],
- Exists => vec![None, None],
- Location | Submatches => groups,
- }
- }
-
- fn step(&self, groups: &mut [Option<uint>], nlist: &mut Threads,
- caps: &mut [Option<uint>], pc: uint)
- -> StepState {
- match self.prog.insts[pc] {
- Match => {
- match self.which {
- Exists => {
- return StepMatchEarlyReturn
- }
- Location => {
- groups[0] = caps[0];
- groups[1] = caps[1];
- return StepMatch
- }
- Submatches => {
- for (slot, val) in groups.iter_mut().zip(caps.iter()) {
- *slot = *val;
- }
- return StepMatch
- }
- }
- }
- OneChar(c, flags) => {
- if self.char_eq(flags & FLAG_NOCASE > 0, self.chars.prev, c) {
- self.add(nlist, pc+1, caps);
- }
- }
- CharClass(ref ranges, flags) => {
- if self.chars.prev.is_some() {
- let c = self.chars.prev.unwrap();
- let negate = flags & FLAG_NEGATED > 0;
- let casei = flags & FLAG_NOCASE > 0;
- let found = ranges.as_slice();
- let found = found.binary_search_by(|&rc| class_cmp(casei, c, rc)).is_ok();
- if found ^ negate {
- self.add(nlist, pc+1, caps);
- }
- }
- }
- Any(flags) => {
- if flags & FLAG_DOTNL > 0
- || !self.char_eq(false, self.chars.prev, '\n') {
- self.add(nlist, pc+1, caps)
- }
- }
- EmptyBegin(_) | EmptyEnd(_) | EmptyWordBoundary(_)
- | Save(_) | Jump(_) | Split(_, _) => {},
- }
- StepContinue
- }
-
- fn add(&self, nlist: &mut Threads, pc: uint, groups: &mut [Option<uint>]) {
- if nlist.contains(pc) {
- return
- }
- // We have to add states to the threads list even if their empty.
- // TL;DR - It prevents cycles.
- // If we didn't care about cycles, we'd *only* add threads that
- // correspond to non-jumping instructions (OneChar, Any, Match, etc.).
- // But, it's possible for valid regexs (like '(a*)*') to result in
- // a cycle in the instruction list. e.g., We'll keep chasing the Split
- // instructions forever.
- // So we add these instructions to our thread queue, but in the main
- // VM loop, we look for them but simply ignore them.
- // Adding them to the queue prevents them from being revisited so we
- // can avoid cycles (and the inevitable stack overflow).
- //
- // We make a minor optimization by indicating that the state is "empty"
- // so that its capture groups are not filled in.
- match self.prog.insts[pc] {
- EmptyBegin(flags) => {
- let multi = flags & FLAG_MULTI > 0;
- nlist.add(pc, groups, true);
- if self.chars.is_begin()
- || (multi && self.char_is(self.chars.prev, '\n')) {
- self.add(nlist, pc + 1, groups)
- }
- }
- EmptyEnd(flags) => {
- let multi = flags & FLAG_MULTI > 0;
- nlist.add(pc, groups, true);
- if self.chars.is_end()
- || (multi && self.char_is(self.chars.cur, '\n')) {
- self.add(nlist, pc + 1, groups)
- }
- }
- EmptyWordBoundary(flags) => {
- nlist.add(pc, groups, true);
- if self.chars.is_word_boundary() == !(flags & FLAG_NEGATED > 0) {
- self.add(nlist, pc + 1, groups)
- }
- }
- Save(slot) => {
- nlist.add(pc, groups, true);
- match self.which {
- Location if slot <= 1 => {
- let old = groups[slot];
- groups[slot] = Some(self.ic);
- self.add(nlist, pc + 1, groups);
- groups[slot] = old;
- }
- Submatches => {
- let old = groups[slot];
- groups[slot] = Some(self.ic);
- self.add(nlist, pc + 1, groups);
- groups[slot] = old;
- }
- Exists | Location => self.add(nlist, pc + 1, groups),
- }
- }
- Jump(to) => {
- nlist.add(pc, groups, true);
- self.add(nlist, to, groups)
- }
- Split(x, y) => {
- nlist.add(pc, groups, true);
- self.add(nlist, x, groups);
- self.add(nlist, y, groups);
- }
- Match | OneChar(_, _) | CharClass(_, _) | Any(_) => {
- nlist.add(pc, groups, false);
- }
- }
- }
-
- // FIXME: For case insensitive comparisons, it uses the uppercase
- // character and tests for equality. IIUC, this does not generalize to
- // all of Unicode. I believe we need to check the entire fold for each
- // character. This will be easy to add if and when it gets added to Rust's
- // standard library.
- #[inline]
- fn char_eq(&self, casei: bool, textc: Option<char>, regc: char) -> bool {
- match textc {
- None => false,
- Some(textc) => {
- regc == textc
- || (casei && regc.to_uppercase() == textc.to_uppercase())
- }
- }
- }
-
- #[inline]
- fn char_is(&self, textc: Option<char>, regc: char) -> bool {
- textc == Some(regc)
- }
-}
-
-/// CharReader is responsible for maintaining a "previous" and a "current"
-/// character. This one-character lookahead is necessary for assertions that
-/// look one character before or after the current position.
-pub struct CharReader<'t> {
- /// The previous character read. It is None only when processing the first
- /// character of the input.
- pub prev: Option<char>,
- /// The current character.
- pub cur: Option<char>,
- input: &'t str,
- next: uint,
-}
-
-impl<'t> CharReader<'t> {
- /// Returns a new CharReader that advances through the input given.
- /// Note that a CharReader has no knowledge of the range in which to search
- /// the input.
- pub fn new(input: &'t str) -> CharReader<'t> {
- CharReader {
- prev: None,
- cur: None,
- input: input,
- next: 0,
- }
- }
-
- /// Sets the previous and current character given any arbitrary byte
- /// index (at a Unicode codepoint boundary).
- #[inline]
- pub fn set(&mut self, ic: uint) -> uint {
- self.prev = None;
- self.cur = None;
- self.next = 0;
-
- if self.input.len() == 0 {
- return 1
- }
- if ic > 0 {
- let i = cmp::min(ic, self.input.len());
- let prev = self.input.char_range_at_reverse(i);
- self.prev = Some(prev.ch);
- }
- if ic < self.input.len() {
- let cur = self.input.char_range_at(ic);
- self.cur = Some(cur.ch);
- self.next = cur.next;
- self.next
- } else {
- self.input.len() + 1
- }
- }
-
- /// Does the same as `set`, except it always advances to the next
- /// character in the input (and therefore does half as many UTF8 decodings).
- #[inline]
- pub fn advance(&mut self) -> uint {
- self.prev = self.cur;
- if self.next < self.input.len() {
- let cur = self.input.char_range_at(self.next);
- self.cur = Some(cur.ch);
- self.next = cur.next;
- } else {
- self.cur = None;
- self.next = self.input.len() + 1;
- }
- self.next
- }
-
- /// Returns true if and only if this is the beginning of the input
- /// (ignoring the range of the input to search).
- #[inline]
- pub fn is_begin(&self) -> bool { self.prev.is_none() }
-
- /// Returns true if and only if this is the end of the input
- /// (ignoring the range of the input to search).
- #[inline]
- pub fn is_end(&self) -> bool { self.cur.is_none() }
-
- /// Returns true if and only if the current position is a word boundary.
- /// (Ignoring the range of the input to search.)
- pub fn is_word_boundary(&self) -> bool {
- if self.is_begin() {
- return is_word(self.cur)
- }
- if self.is_end() {
- return is_word(self.prev)
- }
- (is_word(self.cur) && !is_word(self.prev))
- || (is_word(self.prev) && !is_word(self.cur))
- }
-}
-
-struct Thread {
- pc: uint,
- groups: Vec<Option<uint>>,
-}
-
-struct Threads {
- which: MatchKind,
- queue: Vec<Thread>,
- sparse: Vec<uint>,
- size: uint,
-}
-
-impl Threads {
- // This is using a wicked neat trick to provide constant time lookup
- // for threads in the queue using a sparse set. A queue of threads is
- // allocated once with maximal size when the VM initializes and is reused
- // throughout execution. That is, there should be zero allocation during
- // the execution of a VM.
- //
- // See http://research.swtch.com/sparse for the deets.
- fn new(which: MatchKind, num_insts: uint, ncaps: uint) -> Threads {
- Threads {
- which: which,
- queue: range(0, num_insts).map(|_| {
- Thread { pc: 0, groups: repeat(None).take(ncaps * 2).collect() }
- }).collect(),
- sparse: repeat(0u).take(num_insts).collect(),
- size: 0,
- }
- }
-
- fn add(&mut self, pc: uint, groups: &[Option<uint>], empty: bool) {
- let t = &mut self.queue[self.size];
- t.pc = pc;
- match (empty, self.which) {
- (_, Exists) | (true, _) => {},
- (false, Location) => {
- t.groups[0] = groups[0];
- t.groups[1] = groups[1];
- }
- (false, Submatches) => {
- for (slot, val) in t.groups.iter_mut().zip(groups.iter()) {
- *slot = *val;
- }
- }
- }
- self.sparse[pc] = self.size;
- self.size += 1;
- }
-
- #[inline]
- fn contains(&self, pc: uint) -> bool {
- let s = self.sparse[pc];
- s < self.size && self.queue[s].pc == pc
- }
-
- #[inline]
- fn empty(&mut self) {
- self.size = 0;
- }
-
- #[inline]
- fn pc(&self, i: uint) -> uint {
- self.queue[i].pc
- }
-
- #[inline]
- fn groups<'r>(&'r mut self, i: uint) -> &'r mut [Option<uint>] {
- let q = &mut self.queue[i];
- q.groups.as_mut_slice()
- }
-}
-
-/// Returns true if the character is a word character, according to the
-/// (Unicode friendly) Perl character class '\w'.
-/// Note that this is only use for testing word boundaries. The actual '\w'
-/// is encoded as a CharClass instruction.
-pub fn is_word(c: Option<char>) -> bool {
- let c = match c {
- None => return false,
- Some(c) => c,
- };
- // Try the common ASCII case before invoking binary search.
- match c {
- '_' | '0' ... '9' | 'a' ... 'z' | 'A' ... 'Z' => true,
- _ => PERLW.binary_search_by(|&(start, end)| {
- if c >= start && c <= end {
- Equal
- } else if start > c {
- Greater
- } else {
- Less
- }
- }).is_ok()
- }
-}
-
-/// Given a character and a single character class range, return an ordering
-/// indicating whether the character is less than the start of the range,
-/// in the range (inclusive) or greater than the end of the range.
-///
-/// If `casei` is `true`, then this ordering is computed case insensitively.
-///
-/// This function is meant to be used with a binary search.
-#[inline]
-fn class_cmp(casei: bool, mut textc: char,
- (mut start, mut end): (char, char)) -> Ordering {
- if casei {
- // FIXME: This is pretty ridiculous. All of this case conversion
- // can be moved outside this function:
- // 1) textc should be uppercased outside the bsearch.
- // 2) the character class itself should be uppercased either in the
- // parser or the compiler.
- // FIXME: This is too simplistic for correct Unicode support.
- // See also: char_eq
- textc = textc.to_uppercase();
- start = start.to_uppercase();
- end = end.to_uppercase();
- }
- if textc >= start && textc <= end {
- Equal
- } else if start > textc {
- Greater
- } else {
- Less
- }
-}
-
-/// Returns the starting location of `needle` in `haystack`.
-/// If `needle` is not in `haystack`, then `None` is returned.
-///
-/// Note that this is using a naive substring algorithm.
-#[inline]
-pub fn find_prefix(needle: &[u8], haystack: &[u8]) -> Option<uint> {
- let (hlen, nlen) = (haystack.len(), needle.len());
- if nlen > hlen || nlen == 0 {
- return None
- }
- for (offset, window) in haystack.windows(nlen).enumerate() {
- if window == needle {
- return Some(offset)
- }
- }
- None
-}
extern crate getopts;
extern crate graphviz;
extern crate libc;
-extern crate regex;
extern crate rustc_llvm;
extern crate rustc_back;
extern crate serialize;
use session::search_paths::PathKind;
use util::nodemap::NodeMap;
-use regex::Regex;
-
use syntax::ast::NodeId;
use syntax::codemap::Span;
use syntax::diagnostic::{self, Emitter};
!msg.contains("structure constructor specifies a structure of type") {
return None
}
-
- let first = Regex::new(r"[( ]expected").unwrap();
- let second = Regex::new(r" found").unwrap();
- let third = Regex::new(
- r"\((values differ|lifetime|cyclic type of infinite size)").unwrap();
+ let first = msg.match_indices("expected").filter(|s| {
+ s.0 > 0 && (msg.char_at_reverse(s.0) == ' ' ||
+ msg.char_at_reverse(s.0) == '(')
+ }).map(|(a, b)| (a - 1, b));
+ let second = msg.match_indices("found").filter(|s| {
+ msg.char_at_reverse(s.0) == ' '
+ }).map(|(a, b)| (a - 1, b));
let mut new_msg = String::new();
let mut head = 0u;
// Insert `\n` before expected and found.
- for (pos1, pos2) in first.find_iter(msg).zip(
- second.find_iter(msg)) {
+ for (pos1, pos2) in first.zip(second) {
new_msg = new_msg +
- // A `(` may be preceded by a space and it should be trimmed
- msg[head..pos1.0].trim_right() + // prefix
- "\n" + // insert before first
- &msg[pos1.0..pos1.1] + // insert what first matched
- &msg[pos1.1..pos2.0] + // between matches
- "\n " + // insert before second
- // 123
- // `expected` is 3 char longer than `found`. To align the types, `found` gets
- // 3 spaces prepended.
- &msg[pos2.0..pos2.1]; // insert what second matched
+ // A `(` may be preceded by a space and it should be trimmed
+ msg[head..pos1.0].trim_right() + // prefix
+ "\n" + // insert before first
+ &msg[pos1.0..pos1.1] + // insert what first matched
+ &msg[pos1.1..pos2.0] + // between matches
+ "\n " + // insert before second
+ // 123
+ // `expected` is 3 char longer than `found`. To align the types,
+ // `found` gets 3 spaces prepended.
+ &msg[pos2.0..pos2.1]; // insert what second matched
head = pos2.1;
}
let mut tail = &msg[head..];
+ let third = tail.find_str("(values differ")
+ .or(tail.find_str("(lifetime"))
+ .or(tail.find_str("(cyclic type of infinite size"));
// Insert `\n` before any remaining messages which match.
- for pos in third.find_iter(tail).take(1) {
- // The end of the message may just be wrapped in `()` without `expected`/`found`.
- // Push this also to a new line and add the final tail after.
+ if let Some(pos) = third {
+ // The end of the message may just be wrapped in `()` without
+ // `expected`/`found`. Push this also to a new line and add the
+ // final tail after.
new_msg = new_msg +
- // `(` is usually preceded by a space and should be trimmed.
- tail[..pos.0].trim_right() + // prefix
- "\n" + // insert before paren
- &tail[pos.0..]; // append the tail
+ // `(` is usually preceded by a space and should be trimmed.
+ tail[..pos].trim_right() + // prefix
+ "\n" + // insert before paren
+ &tail[pos..]; // append the tail
tail = "";
}
new_msg.push_str(tail);
-
- return Some(new_msg)
+ return Some(new_msg);
}
pub fn build_session(sopts: config::Options,
#![allow(unstable)]
extern crate getopts;
-extern crate regex;
extern crate serialize;
extern crate "serialize" as rustc_serialize;
extern crate term;
use stats::Stats;
use getopts::{OptGroup, optflag, optopt};
-use regex::Regex;
use serialize::Encodable;
use term::Terminal;
use term::color::{Color, RED, YELLOW, GREEN, CYAN};
}
pub struct TestOpts {
- pub filter: Option<Regex>,
+ pub filter: Option<String>,
pub run_ignored: bool,
pub run_tests: bool,
pub run_benchmarks: bool,
if matches.opt_present("h") { usage(args[0].as_slice()); return None; }
let filter = if matches.free.len() > 0 {
- let s = matches.free[0].as_slice();
- match Regex::new(s) {
- Ok(re) => Some(re),
- Err(e) => return Some(Err(format!("could not parse /{}/: {:?}", s, e)))
- }
+ Some(matches.free[0].clone())
} else {
None
};
// Remove tests that don't match the test filter
filtered = match opts.filter {
None => filtered,
- Some(ref re) => {
- filtered.into_iter()
- .filter(|test| re.is_match(test.desc.name.as_slice())).collect()
+ Some(ref filter) => {
+ filtered.into_iter().filter(|test| {
+ test.desc.name.as_slice().contains(&filter[])
+ }).collect()
}
};
assert!(res == TrFailed);
}
- #[test]
- fn first_free_arg_should_be_a_filter() {
- let args = vec!("progname".to_string(), "some_regex_filter".to_string());
- let opts = match parse_opts(args.as_slice()) {
- Some(Ok(o)) => o,
- _ => panic!("Malformed arg in first_free_arg_should_be_a_filter")
- };
- assert!(opts.filter.expect("should've found filter").is_match("some_regex_filter"))
- }
-
#[test]
fn parse_ignored_flag() {
let args = vec!("progname".to_string(),
}
}
- #[test]
- pub fn filter_tests_regex() {
- let mut opts = TestOpts::new();
- opts.filter = Some(::regex::Regex::new("a.*b.+c").unwrap());
-
- let mut names = ["yes::abXc", "yes::aXXXbXXXXc",
- "no::XYZ", "no::abc"];
- names.sort();
-
- fn test_fn() {}
- let tests = names.iter().map(|name| {
- TestDescAndFn {
- desc: TestDesc {
- name: DynTestName(name.to_string()),
- ignore: false,
- should_fail: ShouldFail::No,
- },
- testfn: DynTestFn(Thunk::new(test_fn))
- }
- }).collect();
- let filtered = filter_tests(&opts, tests);
-
- let expected: Vec<&str> =
- names.iter().map(|&s| s).filter(|name| name.starts_with("yes")).collect();
-
- assert_eq!(filtered.len(), expected.len());
- for (test, expected_name) in filtered.iter().zip(expected.iter()) {
- assert_eq!(test.desc.name.as_slice(), *expected_name);
- }
- }
-
#[test]
pub fn test_metricmap_compare() {
let mut m1 = MetricMap::new();
use std::io::BufferedReader;
use std::iter;
use std::iter::AdditiveIterator;
-use regex::Regex;
pub struct BookItem {
pub title: String,
}
}
- let regex = r"(?P<indent>[\t ]*)\*[:space:]*\[(?P<title>.*)\]\((?P<path>.*)\)";
- let item_re = Regex::new(regex).unwrap();
let mut top_items = vec!();
let mut stack = vec!();
let mut errors = vec!();
}
};
- item_re.captures(&line[]).map(|cap| {
- let given_path = cap.name("path");
- let title = cap.name("title").unwrap().to_string();
-
- let path_from_root = match src.join(given_path.unwrap()).path_relative_from(src) {
- Some(p) => p,
- None => {
- errors.push(format!("paths in SUMMARY.md must be relative, \
- but path '{}' for section '{}' is not.",
- given_path.unwrap(), title));
- Path::new("")
- }
- };
- let path_to_root = Path::new(iter::repeat("../")
- .take(path_from_root.components().count() - 1)
- .collect::<String>());
- let item = BookItem {
- title: title,
- path: path_from_root,
- path_to_root: path_to_root,
- children: vec!(),
- };
- let level = cap.name("indent").unwrap().chars().map(|c| {
- match c {
- ' ' => 1us,
- '\t' => 4,
- _ => unreachable!()
- }
- }).sum() / 4 + 1;
-
- if level > stack.len() + 1 {
- errors.push(format!("section '{}' is indented too deeply; \
- found {}, expected {} or less",
- item.title, level, stack.len() + 1));
- } else if level <= stack.len() {
- collapse(&mut stack, &mut top_items, level);
+ let star_idx = match line.find_str("*") { Some(i) => i, None => continue };
+
+ let start_bracket = star_idx + line[star_idx..].find_str("[").unwrap();
+ let end_bracket = start_bracket + line[start_bracket..].find_str("](").unwrap();
+ let start_paren = end_bracket + 1;
+ let end_paren = start_paren + line[start_paren..].find_str(")").unwrap();
+
+ let given_path = &line[start_paren + 1 .. end_paren];
+ let title = line[start_bracket + 1..end_bracket].to_string();
+ let indent = &line[..star_idx];
+
+ let path_from_root = match src.join(given_path).path_relative_from(src) {
+ Some(p) => p,
+ None => {
+ errors.push(format!("paths in SUMMARY.md must be relative, \
+ but path '{}' for section '{}' is not.",
+ given_path, title));
+ Path::new("")
}
- stack.push(item)
- });
+ };
+ let path_to_root = Path::new(iter::repeat("../")
+ .take(path_from_root.components().count() - 1)
+ .collect::<String>());
+ let item = BookItem {
+ title: title,
+ path: path_from_root,
+ path_to_root: path_to_root,
+ children: vec!(),
+ };
+ let level = indent.chars().map(|c| {
+ match c {
+ ' ' => 1us,
+ '\t' => 4,
+ _ => unreachable!()
+ }
+ }).sum() / 4 + 1;
+
+ if level > stack.len() + 1 {
+ errors.push(format!("section '{}' is indented too deeply; \
+ found {}, expected {} or less",
+ item.title, level, stack.len() + 1));
+ } else if level <= stack.len() {
+ collapse(&mut stack, &mut top_items, level);
+ }
+ stack.push(item)
}
if errors.is_empty() {
use css;
use javascript;
-use regex::Regex;
-
use rustdoc;
struct Build;
let out_path = tgt.join(item.path.dirname());
- let regex = r"\[(?P<title>[^]]*)\]\((?P<url_stem>[^)]*)\.(?P<ext>md|markdown)\)";
- let md_urls = Regex::new(regex).unwrap();
-
let src;
if os::args().len() < 3 {
src = os::getcwd().unwrap().clone();
let markdown_data = try!(File::open(&src.join(&item.path)).read_to_string());
let preprocessed_path = tmp.path().join(item.path.filename().unwrap());
{
- let urls = md_urls.replace_all(&markdown_data[], "[$title]($url_stem.html)");
+ let urls = markdown_data.replace(".md)", ".html)");
try!(File::create(&preprocessed_path)
.write_str(&urls[]));
}
#![feature(slicing_syntax, box_syntax)]
#![allow(unstable)]
-extern crate regex;
-
extern crate rustdoc;
use std::os;
+++ /dev/null
-// The Computer Language Benchmarks Game
-// http://benchmarksgame.alioth.debian.org/
-//
-// contributed by the Rust Project Developers
-
-// Copyright (c) 2014 The Rust Project Developers
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//
-// - Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in
-// the documentation and/or other materials provided with the
-// distribution.
-//
-// - Neither the name of "The Computer Language Benchmarks Game" nor
-// the name of "The Computer Language Shootout Benchmarks" nor the
-// names of its contributors may be used to endorse or promote
-// products derived from this software without specific prior
-// written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-// OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ignore-stage1
-// ignore-cross-compile #12102
-
-#![feature(box_syntax)]
-
-extern crate regex;
-
-use std::io;
-use regex::{NoExpand, Regex};
-use std::sync::{Arc, Future};
-
-macro_rules! regex {
- ($e:expr) => (Regex::new($e).unwrap())
-}
-
-fn count_matches(seq: &str, variant: &Regex) -> int {
- let mut n = 0;
- for _ in variant.find_iter(seq) {
- n += 1;
- }
- n
-}
-
-fn main() {
- let mut rdr = if std::os::getenv("RUST_BENCH").is_some() {
- let fd = io::File::open(&Path::new("shootout-k-nucleotide.data"));
- box io::BufferedReader::new(fd) as Box<io::Reader>
- } else {
- box io::stdin() as Box<io::Reader>
- };
- let mut seq = rdr.read_to_string().unwrap();
- let ilen = seq.len();
-
- seq = regex!(">[^\n]*\n|\n").replace_all(seq.as_slice(), NoExpand(""));
- let seq_arc = Arc::new(seq.clone()); // copy before it moves
- let clen = seq.len();
-
- let mut seqlen = Future::spawn(move|| {
- let substs = vec![
- (regex!("B"), "(c|g|t)"),
- (regex!("D"), "(a|g|t)"),
- (regex!("H"), "(a|c|t)"),
- (regex!("K"), "(g|t)"),
- (regex!("M"), "(a|c)"),
- (regex!("N"), "(a|c|g|t)"),
- (regex!("R"), "(a|g)"),
- (regex!("S"), "(c|g)"),
- (regex!("V"), "(a|c|g)"),
- (regex!("W"), "(a|t)"),
- (regex!("Y"), "(c|t)"),
- ];
- let mut seq = seq;
- for (re, replacement) in substs.into_iter() {
- seq = re.replace_all(seq.as_slice(), NoExpand(replacement));
- }
- seq.len()
- });
-
- let variants = vec![
- regex!("agggtaaa|tttaccct"),
- regex!("[cgt]gggtaaa|tttaccc[acg]"),
- regex!("a[act]ggtaaa|tttacc[agt]t"),
- regex!("ag[act]gtaaa|tttac[agt]ct"),
- regex!("agg[act]taaa|ttta[agt]cct"),
- regex!("aggg[acg]aaa|ttt[cgt]ccct"),
- regex!("agggt[cgt]aa|tt[acg]accct"),
- regex!("agggta[cgt]a|t[acg]taccct"),
- regex!("agggtaa[cgt]|[acg]ttaccct"),
- ];
- let (mut variant_strs, mut counts) = (vec!(), vec!());
- for variant in variants.into_iter() {
- let seq_arc_copy = seq_arc.clone();
- variant_strs.push(variant.to_string());
- counts.push(Future::spawn(move|| {
- count_matches(seq_arc_copy.as_slice(), &variant)
- }));
- }
-
- for (i, variant) in variant_strs.iter().enumerate() {
- println!("{} {}", variant, counts[i].get());
- }
- println!("");
- println!("{}", ilen);
- println!("{}", clen);
- println!("{}", seqlen.get());
-}
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-// exec-env:RUST_LOG=rust-log-filter/f.o
+// exec-env:RUST_LOG=rust-log-filter/foo
#![allow(unknown_features)]
#![feature(box_syntax)]
let _t = Thread::spawn(move|| {
log::set_logger(logger);
- // our regex is "f.o"
- // ensure it is a regex, and isn't anchored
info!("foo");
info!("bar");
info!("foo bar");
info!("bar foo");
- info!("f1o");
});
assert_eq!(rx.recv().unwrap().as_slice(), "foo");
assert_eq!(rx.recv().unwrap().as_slice(), "foo bar");
assert_eq!(rx.recv().unwrap().as_slice(), "bar foo");
- assert_eq!(rx.recv().unwrap().as_slice(), "f1o");
assert!(rx.recv().is_err());
}