// option. This file may not be copied, modified, or distributed
// except according to those terms.
-use collections::HashMap;
+use std::collections::HashMap;
use std::fmt;
use std::from_str::from_str;
use std::str::{MaybeOwned, Owned, Slice};
use vm;
use vm::{CaptureLocs, MatchKind, Exists, Location, Submatches};
-/// Escapes all regular expression meta characters in `text` so that it may be
-/// safely used in a regular expression as a literal string.
+/// Escapes all regular expression meta characters in `text`.
+///
+/// The string returned may be safely used as a literal in a regular
+/// expression.
pub fn quote(text: &str) -> String {
let mut quoted = String::with_capacity(text.len());
for c in text.chars() {
Regex::new(regex).map(|r| r.is_match(text))
}
-/// Regex is a compiled regular expression, represented as either a sequence
-/// of bytecode instructions (dynamic) or as a specialized Rust function
-/// (native). It can be used to search, split
+/// A compiled regular expression
+///
+/// It is represented as either a sequence of bytecode instructions (dynamic)
+/// or as a specialized Rust function (native). It can be used to search, split
/// or replace text. All searching is done with an implicit `.*?` at the
/// beginning and end of an expression. To force an expression to match the
/// whole string (or a prefix or a suffix), you must use an anchor like `^` or
///
/// While this crate will handle Unicode strings (whether in the regular
/// expression or in the search text), all positions returned are **byte
-/// indices**. Every byte index is guaranteed to be at a UTF8 codepoint
+/// indices**. Every byte index is guaranteed to be at a Unicode code point
/// boundary.
///
/// The lifetimes `'r` and `'t` in this crate correspond to the lifetime of a
/// ```rust
/// #![feature(phase)]
/// extern crate regex;
-/// #[phase(syntax)] extern crate regex_macros;
+/// #[phase(plugin)] extern crate regex_macros;
///
/// fn main() {
/// let re = regex!(r"\d+");
/// documentation.
#[deriving(Clone)]
#[allow(visible_private_types)]
-pub struct Regex {
- /// The representation of `Regex` is exported to support the `regex!`
- /// syntax extension. Do not rely on it.
- ///
- /// See the comments for the `program` module in `lib.rs` for a more
- /// detailed explanation for what `regex!` requires.
+pub enum Regex {
+ // The representation of `Regex` is exported to support the `regex!`
+ // syntax extension. Do not rely on it.
+ //
+ // See the comments for the `program` module in `lib.rs` for a more
+ // detailed explanation for what `regex!` requires.
#[doc(hidden)]
- pub original: String,
+ Dynamic(ExDynamic),
#[doc(hidden)]
- pub names: Vec<Option<String>>,
+ Native(ExNative),
+}
+
+#[deriving(Clone)]
+#[doc(hidden)]
+pub struct ExDynamic {
+ original: String,
+ names: Vec<Option<String>>,
#[doc(hidden)]
- pub p: MaybeNative,
+ pub prog: Program
}
-impl fmt::Show for Regex {
- /// Shows the original regular expression.
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "{}", self.original)
- }
+#[doc(hidden)]
+pub struct ExNative {
+ #[doc(hidden)]
+ pub original: &'static str,
+ #[doc(hidden)]
+ pub names: &'static [Option<&'static str>],
+ #[doc(hidden)]
+ pub prog: fn(MatchKind, &str, uint, uint) -> Vec<Option<uint>>
}
-pub enum MaybeNative {
- Dynamic(Program),
- Native(fn(MatchKind, &str, uint, uint) -> Vec<Option<uint>>),
+impl Clone for ExNative {
+ fn clone(&self) -> ExNative { *self }
}
-impl Clone for MaybeNative {
- fn clone(&self) -> MaybeNative {
- match *self {
- Dynamic(ref p) => Dynamic(p.clone()),
- Native(fp) => Native(fp),
- }
+impl fmt::Show for Regex {
+ /// Shows the original regular expression.
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", self.as_str())
}
}
pub fn new(re: &str) -> Result<Regex, parse::Error> {
let ast = try!(parse::parse(re));
let (prog, names) = Program::new(ast);
- Ok(Regex {
- original: re.to_strbuf(),
- names: names, p: Dynamic(prog),
- })
+ Ok(Dynamic(ExDynamic {
+ original: re.to_string(),
+ names: names,
+ prog: prog,
+ }))
}
/// Returns true if and only if the regex matches the string given.
///
/// ```rust
/// # #![feature(phase)]
- /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+ /// # extern crate regex; #[phase(plugin)] extern crate regex_macros;
/// # fn main() {
/// let text = "I categorically deny having triskaidekaphobia.";
/// let matched = regex!(r"\b\w{13}\b").is_match(text);
///
/// # Example
///
- /// Find the start and end location of every word with exactly 13
+ /// Find the start and end location of the first word with exactly 13
/// characters:
///
/// ```rust
/// # #![feature(phase)]
- /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+ /// # extern crate regex; #[phase(plugin)] extern crate regex_macros;
/// # fn main() {
/// let text = "I categorically deny having triskaidekaphobia.";
/// let pos = regex!(r"\b\w{13}\b").find(text);
pub fn find(&self, text: &str) -> Option<(uint, uint)> {
let caps = exec(self, Location, text);
if has_match(&caps) {
- Some((caps.get(0).unwrap(), caps.get(1).unwrap()))
+ Some((caps[0].unwrap(), caps[1].unwrap()))
} else {
None
}
///
/// # Example
///
- /// Find the start and end location of the first word with exactly 13
+ /// Find the start and end location of every word with exactly 13
/// characters:
///
/// ```rust
/// # #![feature(phase)]
- /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+ /// # extern crate regex; #[phase(plugin)] extern crate regex_macros;
/// # fn main() {
/// let text = "Retroactively relinquishing remunerations is reprehensible.";
/// for pos in regex!(r"\b\w{13}\b").find_iter(text) {
///
/// ```rust
/// # #![feature(phase)]
- /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+ /// # extern crate regex; #[phase(plugin)] extern crate regex_macros;
/// # fn main() {
/// let re = regex!(r"'([^']+)'\s+\((\d{4})\)");
/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
///
/// ```rust
/// # #![feature(phase)]
- /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+ /// # extern crate regex; #[phase(plugin)] extern crate regex_macros;
/// # fn main() {
/// let re = regex!(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
///
/// ```rust
/// # #![feature(phase)]
- /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+ /// # extern crate regex; #[phase(plugin)] extern crate regex_macros;
/// # fn main() {
/// let re = regex!(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
/// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
///
/// ```rust
/// # #![feature(phase)]
- /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+ /// # extern crate regex; #[phase(plugin)] extern crate regex_macros;
/// # fn main() {
/// let re = regex!(r"[ \t]+");
/// let fields: Vec<&str> = re.split("a b \t c\td e").collect();
///
/// ```rust
/// # #![feature(phase)]
- /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+ /// # extern crate regex; #[phase(plugin)] extern crate regex_macros;
/// # fn main() {
/// let re = regex!(r"\W+");
/// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect();
///
/// ```rust
/// # #![feature(phase)]
- /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+ /// # extern crate regex; #[phase(plugin)] extern crate regex_macros;
/// # fn main() {
/// let re = regex!("[^01]+");
/// assert_eq!(re.replace("1078910", "").as_slice(), "1010");
///
/// ```rust
/// # #![feature(phase)]
- /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+ /// # extern crate regex; #[phase(plugin)] extern crate regex_macros;
/// # use regex::Captures; fn main() {
/// let re = regex!(r"([^,\s]+),\s+(\S+)");
/// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
- /// format_strbuf!("{} {}", caps.at(2), caps.at(1))
+ /// format!("{} {}", caps.at(2), caps.at(1))
/// });
/// assert_eq!(result.as_slice(), "Bruce Springsteen");
/// # }
///
/// ```rust
/// # #![feature(phase)]
- /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+ /// # extern crate regex; #[phase(plugin)] extern crate regex_macros;
/// # fn main() {
/// let re = regex!(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)");
/// let result = re.replace("Springsteen, Bruce", "$first $last");
///
/// ```rust
/// # #![feature(phase)]
- /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+ /// # extern crate regex; #[phase(plugin)] extern crate regex_macros;
/// # fn main() {
/// use regex::NoExpand;
///
}
new.append(text.slice(last_match, text.len()))
}
+
+ /// Returns the original string of this regex.
+ pub fn as_str<'a>(&'a self) -> &'a str {
+ match *self {
+ Dynamic(ExDynamic { ref original, .. }) => original.as_slice(),
+ Native(ExNative { ref original, .. }) => original.as_slice(),
+ }
+ }
+
+ #[doc(hidden)]
+ #[allow(visible_private_types)]
+ #[experimental]
+ pub fn names_iter<'a>(&'a self) -> NamesIter<'a> {
+ match *self {
+ Native(ref n) => NamesIterNative(n.names.iter()),
+ Dynamic(ref d) => NamesIterDynamic(d.names.iter())
+ }
+ }
+
+ fn names_len(&self) -> uint {
+ match *self {
+ Native(ref n) => n.names.len(),
+ Dynamic(ref d) => d.names.len()
+ }
+ }
+
+}
+
+enum NamesIter<'a> {
+ NamesIterNative(::std::slice::Items<'a, Option<&'static str>>),
+ NamesIterDynamic(::std::slice::Items<'a, Option<String>>)
+}
+
+impl<'a> Iterator<Option<String>> for NamesIter<'a> {
+ fn next(&mut self) -> Option<Option<String>> {
+ match *self {
+ NamesIterNative(ref mut i) => i.next().map(|x| x.map(|s| s.to_string())),
+ NamesIterDynamic(ref mut i) => i.next().map(|x| x.as_ref().map(|s| s.to_string())),
+ }
+ }
}
/// NoExpand indicates literal string replacement.
impl<'t> Replacer for &'t str {
fn reg_replace<'a>(&'a mut self, caps: &Captures) -> MaybeOwned<'a> {
- Owned(caps.expand(*self).into_owned())
+ Owned(caps.expand(*self))
}
}
-impl<'a> Replacer for |&Captures|: 'a -> String {
- fn reg_replace<'r>(&'r mut self, caps: &Captures) -> MaybeOwned<'r> {
- Owned((*self)(caps).into_owned())
+impl<'t> Replacer for |&Captures|: 't -> String {
+ fn reg_replace<'a>(&'a mut self, caps: &Captures) -> MaybeOwned<'a> {
+ Owned((*self)(caps))
}
}
}
impl<'t> Captures<'t> {
+ #[allow(experimental)]
fn new(re: &Regex, search: &'t str, locs: CaptureLocs)
-> Option<Captures<'t>> {
if !has_match(&locs) {
}
let named =
- if re.names.len() == 0 {
+ if re.names_len() == 0 {
None
} else {
let mut named = HashMap::new();
- for (i, name) in re.names.iter().enumerate() {
+ for (i, name) in re.names_iter().enumerate() {
match name {
- &None => {},
- &Some(ref name) => {
- named.insert(name.to_strbuf(), i);
+ None => {},
+ Some(name) => {
+ named.insert(name, i);
}
}
}
/// original string matched.
pub fn pos(&self, i: uint) -> Option<(uint, uint)> {
let (s, e) = (i * 2, i * 2 + 1);
- if e >= self.locs.len() || self.locs.get(s).is_none() {
+ if e >= self.locs.len() || self.locs[s].is_none() {
// VM guarantees that each pair of locations are both Some or None.
return None
}
- Some((self.locs.get(s).unwrap(), self.locs.get(e).unwrap()))
+ Some((self.locs[s].unwrap(), self.locs[e].unwrap()))
}
/// Returns the matched string for the capture group `i`.
let re = Regex::new(r"(^|[^$]|\b)\$(\w+)").unwrap();
let text = re.replace_all(text, |refs: &Captures| -> String {
let (pre, name) = (refs.at(1), refs.at(2));
- format_strbuf!("{}{}",
- pre,
- match from_str::<uint>(name.as_slice()) {
- None => self.name(name).to_strbuf(),
- Some(i) => self.at(i).to_strbuf(),
+ format!("{}{}", pre,
+ match from_str::<uint>(name.as_slice()) {
+ None => self.name(name).to_string(),
+ Some(i) => self.at(i).to_string(),
})
});
let re = Regex::new(r"\$\$").unwrap();
}
}
-impl<'t> Container for Captures<'t> {
+impl<'t> Collection for Captures<'t> {
/// Returns the number of captured groups.
#[inline]
fn len(&self) -> uint {
}
/// An iterator that yields all non-overlapping capture groups matching a
-/// particular regular expression. The iterator stops when no more matches can
-/// be found.
+/// particular regular expression.
+///
+/// The iterator stops when no more matches can be found.
///
/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
/// of the matched string.
if !has_match(&caps) {
return None
} else {
- (caps.get(0).unwrap(), caps.get(1).unwrap())
+ (caps[0].unwrap(), caps[1].unwrap())
};
// Don't accept empty matches immediately following a match.
if !has_match(&caps) {
return None
} else {
- (caps.get(0).unwrap(), caps.get(1).unwrap())
+ (caps[0].unwrap(), caps[1].unwrap())
};
// Don't accept empty matches immediately following a match.
fn exec_slice(re: &Regex, which: MatchKind,
input: &str, s: uint, e: uint) -> CaptureLocs {
- match re.p {
- Dynamic(ref prog) => vm::run(which, prog, input, s, e),
- Native(exec) => exec(which, input, s, e),
+ match *re {
+ Dynamic(ExDynamic { ref prog, .. }) => vm::run(which, prog, input, s, e),
+ Native(ExNative { prog, .. }) => prog(which, input, s, e),
}
}
#[inline]
fn has_match(caps: &CaptureLocs) -> bool {
- caps.len() >= 2 && caps.get(0).is_some() && caps.get(1).is_some()
+ caps.len() >= 2 && caps[0].is_some() && caps[1].is_some()
}