#![allow(visible_private_types)]
use std::cmp;
-use std::iter;
use parse;
use parse::{
Flags, FLAG_EMPTY,
impl Program {
/// Compiles a Regex given its AST.
- pub fn new(ast: ~parse::Ast) -> (Program, ~[Option<~str>]) {
+ pub fn new(ast: parse::Ast) -> (Program, Vec<Option<~str>>) {
let mut c = Compiler {
insts: Vec::with_capacity(100),
names: Vec::with_capacity(10),
// This is a bit hacky since we have to skip over the initial
// 'Save' instruction.
let mut pre = StrBuf::with_capacity(5);
- for i in iter::range(1, c.insts.len()) {
- match *c.insts.get(i) {
+ for inst in c.insts.slice_from(1).iter() {
+ match *inst {
OneChar(c, FLAG_EMPTY) => pre.push_char(c),
_ => break
}
}
- let names = c.names.as_slice().into_owned();
+ let Compiler { insts, names } = c;
let prog = Program {
- insts: c.insts,
+ insts: insts,
prefix: pre.into_owned(),
};
(prog, names)
// The only tricky thing here is patching jump/split instructions to point to
// the right instruction.
impl<'r> Compiler<'r> {
- fn compile(&mut self, ast: ~parse::Ast) {
+ fn compile(&mut self, ast: parse::Ast) {
match ast {
- ~Nothing => {},
- ~Literal(c, flags) => self.push(OneChar(c, flags)),
- ~Dot(nl) => self.push(Any(nl)),
- ~Class(ranges, flags) =>
+ Nothing => {},
+ Literal(c, flags) => self.push(OneChar(c, flags)),
+ Dot(nl) => self.push(Any(nl)),
+ Class(ranges, flags) =>
self.push(CharClass(ranges, flags)),
- ~Begin(flags) => self.push(EmptyBegin(flags)),
- ~End(flags) => self.push(EmptyEnd(flags)),
- ~WordBoundary(flags) => self.push(EmptyWordBoundary(flags)),
- ~Capture(cap, name, x) => {
+ Begin(flags) => self.push(EmptyBegin(flags)),
+ End(flags) => self.push(EmptyEnd(flags)),
+ WordBoundary(flags) => self.push(EmptyWordBoundary(flags)),
+ Capture(cap, name, x) => {
let len = self.names.len();
if cap >= len {
self.names.grow(10 + cap - len, &None)
*self.names.get_mut(cap) = name;
self.push(Save(2 * cap));
- self.compile(x);
+ self.compile(*x);
self.push(Save(2 * cap + 1));
}
- ~Cat(xs) => {
+ Cat(xs) => {
for x in xs.move_iter() {
self.compile(x)
}
}
- ~Alt(x, y) => {
+ Alt(x, y) => {
let split = self.empty_split(); // push: split 0, 0
let j1 = self.insts.len();
- self.compile(x); // push: insts for x
+ self.compile(*x); // push: insts for x
let jmp = self.empty_jump(); // push: jmp 0
let j2 = self.insts.len();
- self.compile(y); // push: insts for y
+ self.compile(*y); // push: insts for y
let j3 = self.insts.len();
self.set_split(split, j1, j2); // split 0, 0 -> split j1, j2
self.set_jump(jmp, j3); // jmp 0 -> jmp j3
}
- ~Rep(x, ZeroOne, g) => {
+ Rep(x, ZeroOne, g) => {
let split = self.empty_split();
let j1 = self.insts.len();
- self.compile(x);
+ self.compile(*x);
let j2 = self.insts.len();
if g.is_greedy() {
self.set_split(split, j2, j1);
}
}
- ~Rep(x, ZeroMore, g) => {
+ Rep(x, ZeroMore, g) => {
let j1 = self.insts.len();
let split = self.empty_split();
let j2 = self.insts.len();
- self.compile(x);
+ self.compile(*x);
let jmp = self.empty_jump();
let j3 = self.insts.len();
self.set_split(split, j3, j2);
}
}
- ~Rep(x, OneMore, g) => {
+ Rep(x, OneMore, g) => {
let j1 = self.insts.len();
- self.compile(x);
+ self.compile(*x);
let split = self.empty_split();
let j2 = self.insts.len();
Capture(uint, Option<~str>, ~Ast),
// Represent concatenation as a flat vector to avoid blowing the
// stack in the compiler.
- Cat(Vec<~Ast>),
+ Cat(Vec<Ast>),
Alt(~Ast, ~Ast),
Rep(~Ast, Repeater, Greed),
}
/// state.
#[deriving(Show)]
enum BuildAst {
- Ast(~Ast),
+ Ast(Ast),
Paren(Flags, uint, ~str), // '('
Bar, // '|'
}
}
}
- fn unwrap(self) -> Result<~Ast, Error> {
+ fn unwrap(self) -> Result<Ast, Error> {
match self {
Ast(x) => Ok(x),
_ => fail!("Tried to unwrap non-AST item: {}", self),
names: Vec<~str>,
}
-pub fn parse(s: &str) -> Result<~Ast, Error> {
+pub fn parse(s: &str) -> Result<Ast, Error> {
Parser {
chars: s.chars().collect(),
chari: 0,
}
impl<'a> Parser<'a> {
- fn parse(&mut self) -> Result<~Ast, Error> {
+ fn parse(&mut self) -> Result<Ast, Error> {
loop {
let c = self.cur();
match c {
// alternate and make it a capture.
if cap.is_some() {
let ast = try!(self.pop_ast());
- self.push(~Capture(cap.unwrap(), cap_name, ast));
+ self.push(Capture(cap.unwrap(), cap_name, ~ast));
}
}
'|' => {
self.chari < self.chars.len()
}
- fn pop_ast(&mut self) -> Result<~Ast, Error> {
+ fn pop_ast(&mut self) -> Result<Ast, Error> {
match self.stack.pop().unwrap().unwrap() {
Err(e) => Err(e),
Ok(ast) => Ok(ast),
}
}
- fn push(&mut self, ast: ~Ast) {
+ fn push(&mut self, ast: Ast) {
self.stack.push(Ast(ast))
}
}
let ast = try!(self.pop_ast());
match ast {
- ~Begin(_) | ~End(_) | ~WordBoundary(_) =>
+ Begin(_) | End(_) | WordBoundary(_) =>
return self.err(
"Repeat arguments cannot be empty width assertions."),
_ => {}
}
let greed = try!(self.get_next_greedy());
- self.push(~Rep(ast, rep, greed));
+ self.push(Rep(~ast, rep, greed));
Ok(())
}
fn push_literal(&mut self, c: char) -> Result<(), Error> {
match c {
'.' => {
- self.push(~Dot(self.flags))
+ self.push(Dot(self.flags))
}
'^' => {
- self.push(~Begin(self.flags))
+ self.push(Begin(self.flags))
}
'$' => {
- self.push(~End(self.flags))
+ self.push(End(self.flags))
}
_ => {
- self.push(~Literal(c, self.flags))
+ self.push(Literal(c, self.flags))
}
}
Ok(())
FLAG_EMPTY
};
let mut ranges: Vec<(char, char)> = vec!();
- let mut alts: Vec<~Ast> = vec!();
+ let mut alts: Vec<Ast> = vec!();
if self.peek_is(1, ']') {
try!(self.expect(']'))
match c {
'[' =>
match self.try_parse_ascii() {
- Some(~Class(asciis, flags)) => {
- alts.push(~Class(asciis, flags ^ negated));
+ Some(Class(asciis, flags)) => {
+ alts.push(Class(asciis, flags ^ negated));
continue
}
Some(ast) =>
},
'\\' => {
match try!(self.parse_escape()) {
- ~Class(asciis, flags) => {
- alts.push(~Class(asciis, flags ^ negated));
+ Class(asciis, flags) => {
+ alts.push(Class(asciis, flags ^ negated));
continue
}
- ~Literal(c2, _) => c = c2, // process below
- ~Begin(_) | ~End(_) | ~WordBoundary(_) =>
+ Literal(c2, _) => c = c2, // process below
+ Begin(_) | End(_) | WordBoundary(_) =>
return self.err(
"\\A, \\z, \\b and \\B are not valid escape \
sequences inside a character class."),
']' => {
if ranges.len() > 0 {
let flags = negated | (self.flags & FLAG_NOCASE);
- let mut ast = ~Class(combine_ranges(ranges), flags);
+ let mut ast = Class(combine_ranges(ranges), flags);
for alt in alts.move_iter() {
- ast = ~Alt(alt, ast)
+ ast = Alt(~alt, ~ast)
}
self.push(ast);
} else if alts.len() > 0 {
let mut ast = alts.pop().unwrap();
for alt in alts.move_iter() {
- ast = ~Alt(alt, ast)
+ ast = Alt(~alt, ~ast)
}
self.push(ast);
}
// and moves the parser to the final ']' character.
// If unsuccessful, no state is changed and None is returned.
// Assumes that '[' is the current character.
- fn try_parse_ascii(&mut self) -> Option<~Ast> {
+ fn try_parse_ascii(&mut self) -> Option<Ast> {
if !self.peek_is(1, ':') {
return None
}
Some(ranges) => {
self.chari = closer;
let flags = negated | (self.flags & FLAG_NOCASE);
- Some(~Class(combine_ranges(ranges), flags))
+ Some(Class(combine_ranges(ranges), flags))
}
}
}
for _ in iter::range(0, min) {
self.push(ast.clone())
}
- self.push(~Rep(ast, ZeroMore, greed));
+ self.push(Rep(~ast, ZeroMore, greed));
} else {
// Require N copies of what's on the stack and then repeat it
// up to M times optionally.
}
if max.is_some() {
for _ in iter::range(min, max.unwrap()) {
- self.push(~Rep(ast.clone(), ZeroOne, greed))
+ self.push(Rep(~ast.clone(), ZeroOne, greed))
}
}
// It's possible that we popped something off the stack but
// never put anything back on it. To keep things simple, add
// a no-op expression.
if min == 0 && (max.is_none() || max == Some(0)) {
- self.push(~Nothing)
+ self.push(Nothing)
}
}
Ok(())
// Parses all escape sequences.
// Assumes that '\' is the current character.
- fn parse_escape(&mut self) -> Result<~Ast, Error> {
+ fn parse_escape(&mut self) -> Result<Ast, Error> {
try!(self.noteof("an escape sequence following a '\\'"))
let c = self.cur();
if is_punct(c) {
- return Ok(~Literal(c, FLAG_EMPTY))
+ return Ok(Literal(c, FLAG_EMPTY))
}
match c {
- 'a' => Ok(~Literal('\x07', FLAG_EMPTY)),
- 'f' => Ok(~Literal('\x0C', FLAG_EMPTY)),
- 't' => Ok(~Literal('\t', FLAG_EMPTY)),
- 'n' => Ok(~Literal('\n', FLAG_EMPTY)),
- 'r' => Ok(~Literal('\r', FLAG_EMPTY)),
- 'v' => Ok(~Literal('\x0B', FLAG_EMPTY)),
- 'A' => Ok(~Begin(FLAG_EMPTY)),
- 'z' => Ok(~End(FLAG_EMPTY)),
- 'b' => Ok(~WordBoundary(FLAG_EMPTY)),
- 'B' => Ok(~WordBoundary(FLAG_NEGATED)),
+ 'a' => Ok(Literal('\x07', FLAG_EMPTY)),
+ 'f' => Ok(Literal('\x0C', FLAG_EMPTY)),
+ 't' => Ok(Literal('\t', FLAG_EMPTY)),
+ 'n' => Ok(Literal('\n', FLAG_EMPTY)),
+ 'r' => Ok(Literal('\r', FLAG_EMPTY)),
+ 'v' => Ok(Literal('\x0B', FLAG_EMPTY)),
+ 'A' => Ok(Begin(FLAG_EMPTY)),
+ 'z' => Ok(End(FLAG_EMPTY)),
+ 'b' => Ok(WordBoundary(FLAG_EMPTY)),
+ 'B' => Ok(WordBoundary(FLAG_NEGATED)),
'0'|'1'|'2'|'3'|'4'|'5'|'6'|'7' => Ok(try!(self.parse_octal())),
'x' => Ok(try!(self.parse_hex())),
'p' | 'P' => Ok(try!(self.parse_unicode_name())),
let ranges = perl_unicode_class(c);
let mut flags = self.flags & FLAG_NOCASE;
if c.is_uppercase() { flags |= FLAG_NEGATED }
- Ok(~Class(ranges, flags))
+ Ok(Class(ranges, flags))
}
_ => self.err(format!("Invalid escape sequence '\\\\{}'", c)),
}
// name is the unicode class name.
// Assumes that \p or \P has been read (and 'p' or 'P' is the current
// character).
- fn parse_unicode_name(&mut self) -> Result<~Ast, Error> {
+ fn parse_unicode_name(&mut self) -> Result<Ast, Error> {
let negated = if self.cur() == 'P' { FLAG_NEGATED } else { FLAG_EMPTY };
let mut name: ~str;
if self.peek_is(1, '{') {
None => return self.err(format!(
"Could not find Unicode class '{}'", name)),
Some(ranges) => {
- Ok(~Class(ranges, negated | (self.flags & FLAG_NOCASE)))
+ Ok(Class(ranges, negated | (self.flags & FLAG_NOCASE)))
}
}
}
// Parses an octal number, up to 3 digits.
// Assumes that \n has been read, where n is the first digit.
- fn parse_octal(&mut self) -> Result<~Ast, Error> {
+ fn parse_octal(&mut self) -> Result<Ast, Error> {
let start = self.chari;
let mut end = start + 1;
let (d2, d3) = (self.peek(1), self.peek(2));
}
let s = self.slice(start, end);
match num::from_str_radix::<u32>(s, 8) {
- Some(n) => Ok(~Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)),
+ Some(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)),
None => self.err(format!(
"Could not parse '{}' as octal number.", s)),
}
// Parse a hex number. Either exactly two digits or anything in {}.
// Assumes that \x has been read.
- fn parse_hex(&mut self) -> Result<~Ast, Error> {
+ fn parse_hex(&mut self) -> Result<Ast, Error> {
if !self.peek_is(1, '{') {
try!(self.expect('{'))
return self.parse_hex_two()
// Assumes that \xn has been read, where n is the first digit and is the
// current character.
// After return, parser will point at the second digit.
- fn parse_hex_two(&mut self) -> Result<~Ast, Error> {
+ fn parse_hex_two(&mut self) -> Result<Ast, Error> {
let (start, end) = (self.chari, self.chari + 2);
let bad = self.slice(start - 2, self.chars.len());
try!(self.noteof(format!("Invalid hex escape sequence '{}'", bad)))
}
// Parses `s` as a hexadecimal number.
- fn parse_hex_digits(&self, s: &str) -> Result<~Ast, Error> {
+ fn parse_hex_digits(&self, s: &str) -> Result<Ast, Error> {
match num::from_str_radix::<u32>(s, 16) {
- Some(n) => Ok(~Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)),
+ Some(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)),
None => self.err(format!(
"Could not parse '{}' as hex number.", s)),
}
// thrown away). But be careful with overflow---we can't count on the
// open paren to be there.
if from > 0 { from = from - 1}
- let ast = try!(self.build_from(from, Alt));
+ let ast = try!(self.build_from(from, |l,r| Alt(~l, ~r)));
self.push(ast);
Ok(())
}
// build_from combines all AST elements starting at 'from' in the
// parser's stack using 'mk' to combine them. If any such element is not an
// AST then it is popped off the stack and ignored.
- fn build_from(&mut self, from: uint, mk: |~Ast, ~Ast| -> Ast)
- -> Result<~Ast, Error> {
+ fn build_from(&mut self, from: uint, mk: |Ast, Ast| -> Ast)
+ -> Result<Ast, Error> {
if from >= self.stack.len() {
return self.err("Empty group or alternate not allowed.")
}
while i > from {
i = i - 1;
match self.stack.pop().unwrap() {
- Ast(x) => combined = ~mk(x, combined),
+ Ast(x) => combined = mk(x, combined),
_ => {},
}
}
// Returns a concatenation of two expressions. This also guarantees that a
// `Cat` expression will never be a direct child of another `Cat` expression.
-fn concat_flatten(x: ~Ast, y: ~Ast) -> Ast {
+fn concat_flatten(x: Ast, y: Ast) -> Ast {
match (x, y) {
- (~Cat(mut xs), ~Cat(ys)) => { xs.push_all_move(ys); Cat(xs) }
- (~Cat(mut xs), ast) => { xs.push(ast); Cat(xs) }
- (ast, ~Cat(mut xs)) => { xs.unshift(ast); Cat(xs) }
+ (Cat(mut xs), Cat(ys)) => { xs.push_all_move(ys); Cat(xs) }
+ (Cat(mut xs), ast) => { xs.push(ast); Cat(xs) }
+ (ast, Cat(mut xs)) => { xs.unshift(ast); Cat(xs) }
(ast1, ast2) => Cat(vec!(ast1, ast2)),
}
}