1 // Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 * Support for matching file paths against Unix shell style patterns.
14 * The `glob` and `glob_with` functions, in concert with the `Paths`
15 * type, allow querying the filesystem for all files that match a particular
16 * pattern - just like the libc `glob` function (for an example see the `glob`
17 * documentation). The methods on the `Pattern` type provide functionality
18 * for checking if individual paths match a particular pattern - in a similar
19 * manner to the libc `fnmatch` function
21 * For consistency across platforms, and for Windows support, this module
22 * is implemented entirely in Rust rather than deferring to the libc
23 * `glob`/`fnmatch` functions.
26 #[crate_id = "glob#0.10-pre"];
27 #[crate_type = "rlib"];
28 #[crate_type = "dylib"];
29 #[license = "MIT/ASL2"];
30 #[doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
31 html_favicon_url = "http://www.rust-lang.org/favicon.ico",
32 html_root_url = "http://static.rust-lang.org/doc/master")];
35 use std::{cmp, os, path};
37 use std::path::is_sep;
40 * An iterator that yields Paths from the filesystem that match a particular
41 * pattern - see the `glob` function for more details.
45 priv dir_patterns: ~[Pattern],
46 priv options: MatchOptions,
47 priv todo: ~[(Path,uint)]
51 /// Return an iterator that produces all the Paths that match the given pattern,
52 /// which may be absolute or relative to the current working directory.
54 /// is method uses the default match options and is equivalent to calling
55 /// `glob_with(pattern, MatchOptions::new())`. Use `glob_with` directly if you
56 /// want to use non-default match options.
60 /// Consider a directory `/media/pictures` containing only the files `kittens.jpg`,
61 /// `puppies.jpg` and `hamsters.gif`:
66 /// for path in glob("/media/pictures/*.jpg") {
67 /// println!("{}", path.display());
71 /// The above code will print:
74 /// /media/pictures/kittens.jpg
75 /// /media/pictures/puppies.jpg
78 pub fn glob(pattern: &str) -> Paths {
79 glob_with(pattern, MatchOptions::new())
83 * Return an iterator that produces all the Paths that match the given pattern,
84 * which may be absolute or relative to the current working directory.
86 * This function accepts Unix shell style patterns as described by `Pattern::new(..)`.
87 * The options given are passed through unchanged to `Pattern::matches_with(..)` with
88 * the exception that `require_literal_separator` is always set to `true` regardless of the
89 * value passed to this function.
91 * Paths are yielded in alphabetical order, as absolute paths.
93 pub fn glob_with(pattern: &str, options: MatchOptions) -> Paths {
95 fn check_windows_verbatim(p: &Path) -> bool { path::windows::is_verbatim(p) }
97 fn check_windows_verbatim(_: &Path) -> bool { false }
99 // calculate root this way to handle volume-relative Windows paths correctly
100 let mut root = os::getcwd();
101 let pat_root = Path::new(pattern).root_path();
102 if pat_root.is_some() {
103 if check_windows_verbatim(pat_root.get_ref()) {
104 // FIXME: How do we want to handle verbatim paths? I'm inclined to return nothing,
105 // since we can't very well find all UNC shares with a 1-letter server name.
106 return Paths { root: root, dir_patterns: ~[], options: options, todo: ~[] };
108 root.push(pat_root.get_ref());
111 let root_len = pat_root.map_or(0u, |p| p.as_vec().len());
112 let dir_patterns = pattern.slice_from(cmp::min(root_len, pattern.len()))
113 .split_terminator(is_sep).map(|s| Pattern::new(s)).to_owned_vec();
115 let todo = list_dir_sorted(&root).move_iter().map(|x|(x,0u)).to_owned_vec();
119 dir_patterns: dir_patterns,
125 impl Iterator<Path> for Paths {
127 fn next(&mut self) -> Option<Path> {
129 if self.dir_patterns.is_empty() || self.todo.is_empty() {
133 let (path,idx) = self.todo.pop().unwrap();
134 let ref pattern = self.dir_patterns[idx];
136 if pattern.matches_with(match path.filename_str() {
137 // this ugly match needs to go here to avoid a borrowck error
139 // FIXME (#9639): How do we handle non-utf8 filenames? Ignore them for now
140 // Ideally we'd still match them against a *
145 if idx == self.dir_patterns.len() - 1 {
146 // it is not possible for a pattern to match a directory *AND* its children
147 // so we don't need to check the children
150 self.todo.extend(&mut list_dir_sorted(&path).move_iter().map(|x|(x,idx+1)));
158 fn list_dir_sorted(path: &Path) -> ~[Path] {
159 match fs::readdir(path) {
160 Ok(mut children) => {
161 children.sort_by(|p1, p2| p2.filename().cmp(&p1.filename()));
169 * A compiled Unix shell style pattern.
171 #[deriving(Clone, Eq, TotalEq, Ord, TotalOrd, Hash, Default)]
173 priv tokens: ~[PatternToken]
176 #[deriving(Clone, Eq, TotalEq, Ord, TotalOrd, Hash)]
181 AnyWithin(~[CharSpecifier]),
182 AnyExcept(~[CharSpecifier])
185 #[deriving(Clone, Eq, TotalEq, Ord, TotalOrd, Hash)]
188 CharRange(char, char)
194 SubPatternDoesntMatch,
195 EntirePatternDoesntMatch
201 * This function compiles Unix shell style patterns: `?` matches any single
202 * character, `*` matches any (possibly empty) sequence of characters and
203 * `[...]` matches any character inside the brackets, unless the first
204 * character is `!` in which case it matches any character except those
205 * between the `!` and the `]`. Character sequences can also specify ranges
206 * of characters, as ordered by Unicode, so e.g. `[0-9]` specifies any
207 * character between 0 and 9 inclusive.
209 * The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets
210 * (e.g. `[?]`). When a `]` occurs immediately following `[` or `[!` then
211 * it is interpreted as being part of, rather then ending, the character
212 * set, so `]` and NOT `]` can be matched by `[]]` and `[!]]` respectively.
213 * The `-` character can be specified inside a character sequence pattern by
214 * placing it at the start or the end, e.g. `[abc-]`.
216 * When a `[` does not have a closing `]` before the end of the string then
217 * the `[` will be treated literally.
219 pub fn new(pattern: &str) -> Pattern {
221 let chars = pattern.chars().to_owned_vec();
222 let mut tokens = ~[];
225 while i < chars.len() {
228 tokens.push(AnyChar);
232 // *, **, ***, ****, ... are all equivalent
233 while i < chars.len() && chars[i] == '*' {
236 tokens.push(AnySequence);
240 if i <= chars.len() - 4 && chars[i + 1] == '!' {
241 match chars.slice_from(i + 3).position_elem(&']') {
244 let chars = chars.slice(i + 2, i + 3 + j);
245 let cs = parse_char_specifiers(chars);
246 tokens.push(AnyExcept(cs));
252 else if i <= chars.len() - 3 && chars[i + 1] != '!' {
253 match chars.slice_from(i + 2).position_elem(&']') {
256 let cs = parse_char_specifiers(chars.slice(i + 1, i + 2 + j));
257 tokens.push(AnyWithin(cs));
264 // if we get here then this is not a valid range pattern
265 tokens.push(Char('['));
269 tokens.push(Char(c));
275 Pattern { tokens: tokens }
279 * Escape metacharacters within the given string by surrounding them in
280 * brackets. The resulting string will, when compiled into a `Pattern`,
281 * match the input string and nothing else.
283 pub fn escape(s: &str) -> ~str {
284 let mut escaped = ~"";
287 // note that ! does not need escaping because it is only special inside brackets
288 '?' | '*' | '[' | ']' => {
289 escaped.push_char('[');
290 escaped.push_char(c);
291 escaped.push_char(']');
294 escaped.push_char(c);
302 * Return if the given `str` matches this `Pattern` using the default
303 * match options (i.e. `MatchOptions::new()`).
310 * assert!(Pattern::new("c?t").matches("cat"));
311 * assert!(Pattern::new("k[!e]tteh").matches("kitteh"));
312 * assert!(Pattern::new("d*g").matches("doog"));
315 pub fn matches(&self, str: &str) -> bool {
316 self.matches_with(str, MatchOptions::new())
320 * Return if the given `Path`, when converted to a `str`, matches this `Pattern`
321 * using the default match options (i.e. `MatchOptions::new()`).
323 pub fn matches_path(&self, path: &Path) -> bool {
324 // FIXME (#9639): This needs to handle non-utf8 paths
325 path.as_str().map_or(false, |s| {
331 * Return if the given `str` matches this `Pattern` using the specified match options.
333 pub fn matches_with(&self, str: &str, options: MatchOptions) -> bool {
334 self.matches_from(None, str, 0, options) == Match
338 * Return if the given `Path`, when converted to a `str`, matches this `Pattern`
339 * using the specified match options.
341 pub fn matches_path_with(&self, path: &Path, options: MatchOptions) -> bool {
342 // FIXME (#9639): This needs to handle non-utf8 paths
343 path.as_str().map_or(false, |s| {
344 self.matches_with(s, options)
348 fn matches_from(&self,
349 prev_char: Option<char>,
352 options: MatchOptions) -> MatchResult {
354 let prev_char = Cell::new(prev_char);
356 let require_literal = |c| {
357 (options.require_literal_separator && is_sep(c)) ||
358 (options.require_literal_leading_dot && c == '.'
359 && is_sep(prev_char.get().unwrap_or('/')))
362 for (ti, token) in self.tokens.slice_from(i).iter().enumerate() {
366 match self.matches_from(prev_char.get(), file, i + ti + 1, options) {
367 SubPatternDoesntMatch => (), // keep trying
372 return EntirePatternDoesntMatch;
375 let (some_c, next) = file.slice_shift_char();
376 if require_literal(some_c.unwrap()) {
377 return SubPatternDoesntMatch;
379 prev_char.set(some_c);
385 return EntirePatternDoesntMatch;
388 let (some_c, next) = file.slice_shift_char();
389 let c = some_c.unwrap();
390 let matches = match *token {
394 AnyWithin(ref specifiers) => {
395 !require_literal(c) && in_char_specifiers(*specifiers, c, options)
397 AnyExcept(ref specifiers) => {
398 !require_literal(c) && !in_char_specifiers(*specifiers, c, options)
401 chars_eq(c, c2, options.case_sensitive)
408 return SubPatternDoesntMatch;
410 prev_char.set(some_c);
419 SubPatternDoesntMatch
425 fn parse_char_specifiers(s: &[char]) -> ~[CharSpecifier] {
429 if i + 3 <= s.len() && s[i + 1] == '-' {
430 cs.push(CharRange(s[i], s[i + 2]));
433 cs.push(SingleChar(s[i]));
440 fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptions) -> bool {
442 for &specifier in specifiers.iter() {
445 if chars_eq(c, sc, options.case_sensitive) {
449 CharRange(start, end) => {
451 // FIXME: work with non-ascii chars properly (issue #1347)
452 if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() {
454 let start = start.to_ascii().to_lower();
455 let end = end.to_ascii().to_lower();
457 let start_up = start.to_upper();
458 let end_up = end.to_upper();
460 // only allow case insensitive matching when
461 // both start and end are within a-z or A-Z
462 if start != start_up && end != end_up {
463 let start = start.to_char();
464 let end = end.to_char();
465 let c = c.to_ascii().to_lower().to_char();
466 if c >= start && c <= end {
472 if c >= start && c <= end {
482 /// A helper function to determine if two chars are (possibly case-insensitively) equal.
483 fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool {
484 if cfg!(windows) && path::windows::is_sep(a) && path::windows::is_sep(b) {
486 } else if !case_sensitive && a.is_ascii() && b.is_ascii() {
487 // FIXME: work with non-ascii chars properly (issue #1347)
488 a.to_ascii().eq_ignore_case(b.to_ascii())
495 * Configuration options to modify the behaviour of `Pattern::matches_with(..)`
497 #[deriving(Clone, Eq, TotalEq, Ord, TotalOrd, Hash, Default)]
498 pub struct MatchOptions {
501 * Whether or not patterns should be matched in a case-sensitive manner. This
502 * currently only considers upper/lower case relationships between ASCII characters,
503 * but in future this might be extended to work with Unicode.
505 priv case_sensitive: bool,
508 * If this is true then path-component separator characters (e.g. `/` on Posix)
509 * must be matched by a literal `/`, rather than by `*` or `?` or `[...]`
511 priv require_literal_separator: bool,
514 * If this is true then paths that contain components that start with a `.` will
515 * not match unless the `.` appears literally in the pattern: `*`, `?` or `[...]`
516 * will not match. This is useful because such files are conventionally considered
517 * hidden on Unix systems and it might be desirable to skip them when listing files.
519 priv require_literal_leading_dot: bool
525 * Constructs a new `MatchOptions` with default field values. This is used
526 * when calling functions that do not take an explicit `MatchOptions` parameter.
528 * This function always returns this value:
532 * case_sensitive: true,
533 * require_literal_separator: false.
534 * require_literal_leading_dot: false
538 pub fn new() -> MatchOptions {
540 case_sensitive: true,
541 require_literal_separator: false,
542 require_literal_leading_dot: false
551 use super::{glob, Pattern, MatchOptions};
554 fn test_absolute_pattern() {
555 // assume that the filesystem is not empty!
556 assert!(glob("/*").next().is_some());
557 assert!(glob("//").next().is_none());
559 // check windows absolute paths with host/device components
560 let root_with_device = os::getcwd().root_path().unwrap().join("*");
561 // FIXME (#9639): This needs to handle non-utf8 paths
562 assert!(glob(root_with_device.as_str().unwrap()).next().is_some());
566 fn test_wildcard_optimizations() {
567 assert!(Pattern::new("a*b").matches("a___b"));
568 assert!(Pattern::new("a**b").matches("a___b"));
569 assert!(Pattern::new("a***b").matches("a___b"));
570 assert!(Pattern::new("a*b*c").matches("abc"));
571 assert!(!Pattern::new("a*b*c").matches("abcd"));
572 assert!(Pattern::new("a*b*c").matches("a_b_c"));
573 assert!(Pattern::new("a*b*c").matches("a___b___c"));
574 assert!(Pattern::new("abc*abc*abc").matches("abcabcabcabcabcabcabc"));
575 assert!(!Pattern::new("abc*abc*abc").matches("abcabcabcabcabcabcabca"));
576 assert!(Pattern::new("a*a*a*a*a*a*a*a*a").matches("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
577 assert!(Pattern::new("a*b[xyz]c*d").matches("abxcdbxcddd"));
581 fn test_lots_of_files() {
582 // this is a good test because it touches lots of differently named files
583 glob("/*/*/*/*").skip(10000).next();
587 fn test_range_pattern() {
589 let pat = Pattern::new("a[0-9]b");
590 for i in range(0, 10) {
591 assert!(pat.matches(format!("a{}b", i)));
593 assert!(!pat.matches("a_b"));
595 let pat = Pattern::new("a[!0-9]b");
596 for i in range(0, 10) {
597 assert!(!pat.matches(format!("a{}b", i)));
599 assert!(pat.matches("a_b"));
601 let pats = ["[a-z123]", "[1a-z23]", "[123a-z]"];
602 for &p in pats.iter() {
603 let pat = Pattern::new(p);
604 for c in "abcdefghijklmnopqrstuvwxyz".chars() {
605 assert!(pat.matches(c.to_str()));
607 for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ".chars() {
608 let options = MatchOptions {case_sensitive: false, .. MatchOptions::new()};
609 assert!(pat.matches_with(c.to_str(), options));
611 assert!(pat.matches("1"));
612 assert!(pat.matches("2"));
613 assert!(pat.matches("3"));
616 let pats = ["[abc-]", "[-abc]", "[a-c-]"];
617 for &p in pats.iter() {
618 let pat = Pattern::new(p);
619 assert!(pat.matches("a"));
620 assert!(pat.matches("b"));
621 assert!(pat.matches("c"));
622 assert!(pat.matches("-"));
623 assert!(!pat.matches("d"));
626 let pat = Pattern::new("[2-1]");
627 assert!(!pat.matches("1"));
628 assert!(!pat.matches("2"));
630 assert!(Pattern::new("[-]").matches("-"));
631 assert!(!Pattern::new("[!-]").matches("-"));
635 fn test_unclosed_bracket() {
636 // unclosed `[` should be treated literally
637 assert!(Pattern::new("abc[def").matches("abc[def"));
638 assert!(Pattern::new("abc[!def").matches("abc[!def"));
639 assert!(Pattern::new("abc[").matches("abc["));
640 assert!(Pattern::new("abc[!").matches("abc[!"));
641 assert!(Pattern::new("abc[d").matches("abc[d"));
642 assert!(Pattern::new("abc[!d").matches("abc[!d"));
643 assert!(Pattern::new("abc[]").matches("abc[]"));
644 assert!(Pattern::new("abc[!]").matches("abc[!]"));
648 fn test_pattern_matches() {
649 let txt_pat = Pattern::new("*hello.txt");
650 assert!(txt_pat.matches("hello.txt"));
651 assert!(txt_pat.matches("gareth_says_hello.txt"));
652 assert!(txt_pat.matches("some/path/to/hello.txt"));
653 assert!(txt_pat.matches("some\\path\\to\\hello.txt"));
654 assert!(txt_pat.matches("/an/absolute/path/to/hello.txt"));
655 assert!(!txt_pat.matches("hello.txt-and-then-some"));
656 assert!(!txt_pat.matches("goodbye.txt"));
658 let dir_pat = Pattern::new("*some/path/to/hello.txt");
659 assert!(dir_pat.matches("some/path/to/hello.txt"));
660 assert!(dir_pat.matches("a/bigger/some/path/to/hello.txt"));
661 assert!(!dir_pat.matches("some/path/to/hello.txt-and-then-some"));
662 assert!(!dir_pat.matches("some/other/path/to/hello.txt"));
666 fn test_pattern_escape() {
667 let s = "_[_]_?_*_!_";
668 assert_eq!(Pattern::escape(s), ~"_[[]_[]]_[?]_[*]_!_");
669 assert!(Pattern::new(Pattern::escape(s)).matches(s));
673 fn test_pattern_matches_case_insensitive() {
675 let pat = Pattern::new("aBcDeFg");
676 let options = MatchOptions {
677 case_sensitive: false,
678 require_literal_separator: false,
679 require_literal_leading_dot: false
682 assert!(pat.matches_with("aBcDeFg", options));
683 assert!(pat.matches_with("abcdefg", options));
684 assert!(pat.matches_with("ABCDEFG", options));
685 assert!(pat.matches_with("AbCdEfG", options));
689 fn test_pattern_matches_case_insensitive_range() {
691 let pat_within = Pattern::new("[a]");
692 let pat_except = Pattern::new("[!a]");
694 let options_case_insensitive = MatchOptions {
695 case_sensitive: false,
696 require_literal_separator: false,
697 require_literal_leading_dot: false
699 let options_case_sensitive = MatchOptions {
700 case_sensitive: true,
701 require_literal_separator: false,
702 require_literal_leading_dot: false
705 assert!(pat_within.matches_with("a", options_case_insensitive));
706 assert!(pat_within.matches_with("A", options_case_insensitive));
707 assert!(!pat_within.matches_with("A", options_case_sensitive));
709 assert!(!pat_except.matches_with("a", options_case_insensitive));
710 assert!(!pat_except.matches_with("A", options_case_insensitive));
711 assert!(pat_except.matches_with("A", options_case_sensitive));
715 fn test_pattern_matches_require_literal_separator() {
717 let options_require_literal = MatchOptions {
718 case_sensitive: true,
719 require_literal_separator: true,
720 require_literal_leading_dot: false
722 let options_not_require_literal = MatchOptions {
723 case_sensitive: true,
724 require_literal_separator: false,
725 require_literal_leading_dot: false
728 assert!(Pattern::new("abc/def").matches_with("abc/def", options_require_literal));
729 assert!(!Pattern::new("abc?def").matches_with("abc/def", options_require_literal));
730 assert!(!Pattern::new("abc*def").matches_with("abc/def", options_require_literal));
731 assert!(!Pattern::new("abc[/]def").matches_with("abc/def", options_require_literal));
733 assert!(Pattern::new("abc/def").matches_with("abc/def", options_not_require_literal));
734 assert!(Pattern::new("abc?def").matches_with("abc/def", options_not_require_literal));
735 assert!(Pattern::new("abc*def").matches_with("abc/def", options_not_require_literal));
736 assert!(Pattern::new("abc[/]def").matches_with("abc/def", options_not_require_literal));
740 fn test_pattern_matches_require_literal_leading_dot() {
742 let options_require_literal_leading_dot = MatchOptions {
743 case_sensitive: true,
744 require_literal_separator: false,
745 require_literal_leading_dot: true
747 let options_not_require_literal_leading_dot = MatchOptions {
748 case_sensitive: true,
749 require_literal_separator: false,
750 require_literal_leading_dot: false
753 let f = |options| Pattern::new("*.txt").matches_with(".hello.txt", options);
754 assert!(f(options_not_require_literal_leading_dot));
755 assert!(!f(options_require_literal_leading_dot));
757 let f = |options| Pattern::new(".*.*").matches_with(".hello.txt", options);
758 assert!(f(options_not_require_literal_leading_dot));
759 assert!(f(options_require_literal_leading_dot));
761 let f = |options| Pattern::new("aaa/bbb/*").matches_with("aaa/bbb/.ccc", options);
762 assert!(f(options_not_require_literal_leading_dot));
763 assert!(!f(options_require_literal_leading_dot));
765 let f = |options| Pattern::new("aaa/bbb/*").matches_with("aaa/bbb/c.c.c.", options);
766 assert!(f(options_not_require_literal_leading_dot));
767 assert!(f(options_require_literal_leading_dot));
769 let f = |options| Pattern::new("aaa/bbb/.*").matches_with("aaa/bbb/.ccc", options);
770 assert!(f(options_not_require_literal_leading_dot));
771 assert!(f(options_require_literal_leading_dot));
773 let f = |options| Pattern::new("aaa/?bbb").matches_with("aaa/.bbb", options);
774 assert!(f(options_not_require_literal_leading_dot));
775 assert!(!f(options_require_literal_leading_dot));
777 let f = |options| Pattern::new("aaa/[.]bbb").matches_with("aaa/.bbb", options);
778 assert!(f(options_not_require_literal_leading_dot));
779 assert!(!f(options_require_literal_leading_dot));
783 fn test_matches_path() {
784 // on windows, (Path::new("a/b").as_str().unwrap() == "a\\b"), so this
785 // tests that / and \ are considered equivalent on windows
786 assert!(Pattern::new("a/b").matches_path(&Path::new("a/b")));