1 // Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 * Support for matching file paths against Unix shell style patterns.
14 * The `glob` and `glob_with` functions, in concert with the `Paths`
15 * type, allow querying the filesystem for all files that match a particular
16 * pattern - just like the libc `glob` function (for an example see the `glob`
17 * documentation). The methods on the `Pattern` type provide functionality
18 * for checking if individual paths match a particular pattern - in a similar
19 * manner to the libc `fnmatch` function
21 * For consistency across platforms, and for Windows support, this module
22 * is implemented entirely in Rust rather than deferring to the libc
23 * `glob`/`fnmatch` functions.
26 #![crate_id = "glob#0.10"]
27 #![crate_type = "rlib"]
28 #![crate_type = "dylib"]
29 #![license = "MIT/ASL2"]
30 #![doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
31 html_favicon_url = "http://www.rust-lang.org/favicon.ico",
32 html_root_url = "http://static.rust-lang.org/doc/master")]
35 use std::{cmp, os, path};
37 use std::path::is_sep;
40 * An iterator that yields Paths from the filesystem that match a particular
41 * pattern - see the `glob` function for more details.
45 priv dir_patterns: Vec<Pattern> ,
46 priv options: MatchOptions,
47 priv todo: Vec<(Path,uint)> }
50 /// Return an iterator that produces all the Paths that match the given pattern,
51 /// which may be absolute or relative to the current working directory.
53 /// is method uses the default match options and is equivalent to calling
54 /// `glob_with(pattern, MatchOptions::new())`. Use `glob_with` directly if you
55 /// want to use non-default match options.
59 /// Consider a directory `/media/pictures` containing only the files `kittens.jpg`,
60 /// `puppies.jpg` and `hamsters.gif`:
65 /// for path in glob("/media/pictures/*.jpg") {
66 /// println!("{}", path.display());
70 /// The above code will print:
73 /// /media/pictures/kittens.jpg
74 /// /media/pictures/puppies.jpg
77 pub fn glob(pattern: &str) -> Paths {
78 glob_with(pattern, MatchOptions::new())
82 * Return an iterator that produces all the Paths that match the given pattern,
83 * which may be absolute or relative to the current working directory.
85 * This function accepts Unix shell style patterns as described by `Pattern::new(..)`.
86 * The options given are passed through unchanged to `Pattern::matches_with(..)` with
87 * the exception that `require_literal_separator` is always set to `true` regardless of the
88 * value passed to this function.
90 * Paths are yielded in alphabetical order, as absolute paths.
92 pub fn glob_with(pattern: &str, options: MatchOptions) -> Paths {
94 fn check_windows_verbatim(p: &Path) -> bool { path::windows::is_verbatim(p) }
96 fn check_windows_verbatim(_: &Path) -> bool { false }
98 // calculate root this way to handle volume-relative Windows paths correctly
99 let mut root = os::getcwd();
100 let pat_root = Path::new(pattern).root_path();
101 if pat_root.is_some() {
102 if check_windows_verbatim(pat_root.get_ref()) {
103 // FIXME: How do we want to handle verbatim paths? I'm inclined to return nothing,
104 // since we can't very well find all UNC shares with a 1-letter server name.
107 dir_patterns: Vec::new(),
112 root.push(pat_root.get_ref());
115 let root_len = pat_root.map_or(0u, |p| p.as_vec().len());
116 let dir_patterns = pattern.slice_from(cmp::min(root_len, pattern.len()))
117 .split_terminator(is_sep)
118 .map(|s| Pattern::new(s))
121 let todo = list_dir_sorted(&root).move_iter().map(|x|(x,0u)).collect();
125 dir_patterns: dir_patterns,
131 impl Iterator<Path> for Paths {
133 fn next(&mut self) -> Option<Path> {
135 if self.dir_patterns.is_empty() || self.todo.is_empty() {
139 let (path,idx) = self.todo.pop().unwrap();
140 let ref pattern = *self.dir_patterns.get(idx);
142 if pattern.matches_with(match path.filename_str() {
143 // this ugly match needs to go here to avoid a borrowck error
145 // FIXME (#9639): How do we handle non-utf8 filenames? Ignore them for now
146 // Ideally we'd still match them against a *
151 if idx == self.dir_patterns.len() - 1 {
152 // it is not possible for a pattern to match a directory *AND* its children
153 // so we don't need to check the children
156 self.todo.extend(list_dir_sorted(&path).move_iter().map(|x|(x,idx+1)));
164 fn list_dir_sorted(path: &Path) -> Vec<Path> {
165 match fs::readdir(path) {
166 Ok(mut children) => {
167 children.sort_by(|p1, p2| p2.filename().cmp(&p1.filename()));
168 children.move_iter().collect()
170 Err(..) => Vec::new()
175 * A compiled Unix shell style pattern.
177 #[deriving(Clone, Eq, TotalEq, Ord, TotalOrd, Hash, Default)]
179 priv tokens: Vec<PatternToken> }
181 #[deriving(Clone, Eq, TotalEq, Ord, TotalOrd, Hash)]
186 AnyWithin(Vec<CharSpecifier> ),
187 AnyExcept(Vec<CharSpecifier> )
190 #[deriving(Clone, Eq, TotalEq, Ord, TotalOrd, Hash)]
193 CharRange(char, char)
199 SubPatternDoesntMatch,
200 EntirePatternDoesntMatch
206 * This function compiles Unix shell style patterns: `?` matches any single
207 * character, `*` matches any (possibly empty) sequence of characters and
208 * `[...]` matches any character inside the brackets, unless the first
209 * character is `!` in which case it matches any character except those
210 * between the `!` and the `]`. Character sequences can also specify ranges
211 * of characters, as ordered by Unicode, so e.g. `[0-9]` specifies any
212 * character between 0 and 9 inclusive.
214 * The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets
215 * (e.g. `[?]`). When a `]` occurs immediately following `[` or `[!` then
216 * it is interpreted as being part of, rather then ending, the character
217 * set, so `]` and NOT `]` can be matched by `[]]` and `[!]]` respectively.
218 * The `-` character can be specified inside a character sequence pattern by
219 * placing it at the start or the end, e.g. `[abc-]`.
221 * When a `[` does not have a closing `]` before the end of the string then
222 * the `[` will be treated literally.
224 pub fn new(pattern: &str) -> Pattern {
226 let chars = pattern.chars().collect::<~[_]>();
227 let mut tokens = Vec::new();
230 while i < chars.len() {
233 tokens.push(AnyChar);
237 // *, **, ***, ****, ... are all equivalent
238 while i < chars.len() && chars[i] == '*' {
241 tokens.push(AnySequence);
245 if i <= chars.len() - 4 && chars[i + 1] == '!' {
246 match chars.slice_from(i + 3).position_elem(&']') {
249 let chars = chars.slice(i + 2, i + 3 + j);
250 let cs = parse_char_specifiers(chars);
251 tokens.push(AnyExcept(cs));
257 else if i <= chars.len() - 3 && chars[i + 1] != '!' {
258 match chars.slice_from(i + 2).position_elem(&']') {
261 let cs = parse_char_specifiers(chars.slice(i + 1, i + 2 + j));
262 tokens.push(AnyWithin(cs));
269 // if we get here then this is not a valid range pattern
270 tokens.push(Char('['));
274 tokens.push(Char(c));
280 Pattern { tokens: tokens }
284 * Escape metacharacters within the given string by surrounding them in
285 * brackets. The resulting string will, when compiled into a `Pattern`,
286 * match the input string and nothing else.
288 pub fn escape(s: &str) -> ~str {
289 let mut escaped = ~"";
292 // note that ! does not need escaping because it is only special inside brackets
293 '?' | '*' | '[' | ']' => {
294 escaped.push_char('[');
295 escaped.push_char(c);
296 escaped.push_char(']');
299 escaped.push_char(c);
307 * Return if the given `str` matches this `Pattern` using the default
308 * match options (i.e. `MatchOptions::new()`).
315 * assert!(Pattern::new("c?t").matches("cat"));
316 * assert!(Pattern::new("k[!e]tteh").matches("kitteh"));
317 * assert!(Pattern::new("d*g").matches("doog"));
320 pub fn matches(&self, str: &str) -> bool {
321 self.matches_with(str, MatchOptions::new())
325 * Return if the given `Path`, when converted to a `str`, matches this `Pattern`
326 * using the default match options (i.e. `MatchOptions::new()`).
328 pub fn matches_path(&self, path: &Path) -> bool {
329 // FIXME (#9639): This needs to handle non-utf8 paths
330 path.as_str().map_or(false, |s| {
336 * Return if the given `str` matches this `Pattern` using the specified match options.
338 pub fn matches_with(&self, str: &str, options: MatchOptions) -> bool {
339 self.matches_from(None, str, 0, options) == Match
343 * Return if the given `Path`, when converted to a `str`, matches this `Pattern`
344 * using the specified match options.
346 pub fn matches_path_with(&self, path: &Path, options: MatchOptions) -> bool {
347 // FIXME (#9639): This needs to handle non-utf8 paths
348 path.as_str().map_or(false, |s| {
349 self.matches_with(s, options)
353 fn matches_from(&self,
354 prev_char: Option<char>,
357 options: MatchOptions) -> MatchResult {
359 let prev_char = Cell::new(prev_char);
361 let require_literal = |c| {
362 (options.require_literal_separator && is_sep(c)) ||
363 (options.require_literal_leading_dot && c == '.'
364 && is_sep(prev_char.get().unwrap_or('/')))
367 for (ti, token) in self.tokens.slice_from(i).iter().enumerate() {
371 match self.matches_from(prev_char.get(), file, i + ti + 1, options) {
372 SubPatternDoesntMatch => (), // keep trying
377 return EntirePatternDoesntMatch;
380 let (some_c, next) = file.slice_shift_char();
381 if require_literal(some_c.unwrap()) {
382 return SubPatternDoesntMatch;
384 prev_char.set(some_c);
390 return EntirePatternDoesntMatch;
393 let (some_c, next) = file.slice_shift_char();
394 let c = some_c.unwrap();
395 let matches = match *token {
399 AnyWithin(ref specifiers) => {
400 !require_literal(c) &&
401 in_char_specifiers(specifiers.as_slice(),
405 AnyExcept(ref specifiers) => {
406 !require_literal(c) &&
407 !in_char_specifiers(specifiers.as_slice(),
412 chars_eq(c, c2, options.case_sensitive)
419 return SubPatternDoesntMatch;
421 prev_char.set(some_c);
430 SubPatternDoesntMatch
436 fn parse_char_specifiers(s: &[char]) -> Vec<CharSpecifier> {
437 let mut cs = Vec::new();
440 if i + 3 <= s.len() && s[i + 1] == '-' {
441 cs.push(CharRange(s[i], s[i + 2]));
444 cs.push(SingleChar(s[i]));
451 fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptions) -> bool {
453 for &specifier in specifiers.iter() {
456 if chars_eq(c, sc, options.case_sensitive) {
460 CharRange(start, end) => {
462 // FIXME: work with non-ascii chars properly (issue #1347)
463 if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() {
465 let start = start.to_ascii().to_lower();
466 let end = end.to_ascii().to_lower();
468 let start_up = start.to_upper();
469 let end_up = end.to_upper();
471 // only allow case insensitive matching when
472 // both start and end are within a-z or A-Z
473 if start != start_up && end != end_up {
474 let start = start.to_char();
475 let end = end.to_char();
476 let c = c.to_ascii().to_lower().to_char();
477 if c >= start && c <= end {
483 if c >= start && c <= end {
493 /// A helper function to determine if two chars are (possibly case-insensitively) equal.
494 fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool {
495 if cfg!(windows) && path::windows::is_sep(a) && path::windows::is_sep(b) {
497 } else if !case_sensitive && a.is_ascii() && b.is_ascii() {
498 // FIXME: work with non-ascii chars properly (issue #1347)
499 a.to_ascii().eq_ignore_case(b.to_ascii())
506 * Configuration options to modify the behaviour of `Pattern::matches_with(..)`
508 #[deriving(Clone, Eq, TotalEq, Ord, TotalOrd, Hash, Default)]
509 pub struct MatchOptions {
512 * Whether or not patterns should be matched in a case-sensitive manner. This
513 * currently only considers upper/lower case relationships between ASCII characters,
514 * but in future this might be extended to work with Unicode.
516 priv case_sensitive: bool,
519 * If this is true then path-component separator characters (e.g. `/` on Posix)
520 * must be matched by a literal `/`, rather than by `*` or `?` or `[...]`
522 priv require_literal_separator: bool,
525 * If this is true then paths that contain components that start with a `.` will
526 * not match unless the `.` appears literally in the pattern: `*`, `?` or `[...]`
527 * will not match. This is useful because such files are conventionally considered
528 * hidden on Unix systems and it might be desirable to skip them when listing files.
530 priv require_literal_leading_dot: bool
536 * Constructs a new `MatchOptions` with default field values. This is used
537 * when calling functions that do not take an explicit `MatchOptions` parameter.
539 * This function always returns this value:
543 * case_sensitive: true,
544 * require_literal_separator: false.
545 * require_literal_leading_dot: false
549 pub fn new() -> MatchOptions {
551 case_sensitive: true,
552 require_literal_separator: false,
553 require_literal_leading_dot: false
562 use super::{glob, Pattern, MatchOptions};
565 fn test_absolute_pattern() {
566 // assume that the filesystem is not empty!
567 assert!(glob("/*").next().is_some());
568 assert!(glob("//").next().is_none());
570 // check windows absolute paths with host/device components
571 let root_with_device = os::getcwd().root_path().unwrap().join("*");
572 // FIXME (#9639): This needs to handle non-utf8 paths
573 assert!(glob(root_with_device.as_str().unwrap()).next().is_some());
577 fn test_wildcard_optimizations() {
578 assert!(Pattern::new("a*b").matches("a___b"));
579 assert!(Pattern::new("a**b").matches("a___b"));
580 assert!(Pattern::new("a***b").matches("a___b"));
581 assert!(Pattern::new("a*b*c").matches("abc"));
582 assert!(!Pattern::new("a*b*c").matches("abcd"));
583 assert!(Pattern::new("a*b*c").matches("a_b_c"));
584 assert!(Pattern::new("a*b*c").matches("a___b___c"));
585 assert!(Pattern::new("abc*abc*abc").matches("abcabcabcabcabcabcabc"));
586 assert!(!Pattern::new("abc*abc*abc").matches("abcabcabcabcabcabcabca"));
587 assert!(Pattern::new("a*a*a*a*a*a*a*a*a").matches("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
588 assert!(Pattern::new("a*b[xyz]c*d").matches("abxcdbxcddd"));
592 fn test_lots_of_files() {
593 // this is a good test because it touches lots of differently named files
594 glob("/*/*/*/*").skip(10000).next();
598 fn test_range_pattern() {
600 let pat = Pattern::new("a[0-9]b");
601 for i in range(0, 10) {
602 assert!(pat.matches(format!("a{}b", i)));
604 assert!(!pat.matches("a_b"));
606 let pat = Pattern::new("a[!0-9]b");
607 for i in range(0, 10) {
608 assert!(!pat.matches(format!("a{}b", i)));
610 assert!(pat.matches("a_b"));
612 let pats = ["[a-z123]", "[1a-z23]", "[123a-z]"];
613 for &p in pats.iter() {
614 let pat = Pattern::new(p);
615 for c in "abcdefghijklmnopqrstuvwxyz".chars() {
616 assert!(pat.matches(c.to_str()));
618 for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ".chars() {
619 let options = MatchOptions {case_sensitive: false, .. MatchOptions::new()};
620 assert!(pat.matches_with(c.to_str(), options));
622 assert!(pat.matches("1"));
623 assert!(pat.matches("2"));
624 assert!(pat.matches("3"));
627 let pats = ["[abc-]", "[-abc]", "[a-c-]"];
628 for &p in pats.iter() {
629 let pat = Pattern::new(p);
630 assert!(pat.matches("a"));
631 assert!(pat.matches("b"));
632 assert!(pat.matches("c"));
633 assert!(pat.matches("-"));
634 assert!(!pat.matches("d"));
637 let pat = Pattern::new("[2-1]");
638 assert!(!pat.matches("1"));
639 assert!(!pat.matches("2"));
641 assert!(Pattern::new("[-]").matches("-"));
642 assert!(!Pattern::new("[!-]").matches("-"));
646 fn test_unclosed_bracket() {
647 // unclosed `[` should be treated literally
648 assert!(Pattern::new("abc[def").matches("abc[def"));
649 assert!(Pattern::new("abc[!def").matches("abc[!def"));
650 assert!(Pattern::new("abc[").matches("abc["));
651 assert!(Pattern::new("abc[!").matches("abc[!"));
652 assert!(Pattern::new("abc[d").matches("abc[d"));
653 assert!(Pattern::new("abc[!d").matches("abc[!d"));
654 assert!(Pattern::new("abc[]").matches("abc[]"));
655 assert!(Pattern::new("abc[!]").matches("abc[!]"));
659 fn test_pattern_matches() {
660 let txt_pat = Pattern::new("*hello.txt");
661 assert!(txt_pat.matches("hello.txt"));
662 assert!(txt_pat.matches("gareth_says_hello.txt"));
663 assert!(txt_pat.matches("some/path/to/hello.txt"));
664 assert!(txt_pat.matches("some\\path\\to\\hello.txt"));
665 assert!(txt_pat.matches("/an/absolute/path/to/hello.txt"));
666 assert!(!txt_pat.matches("hello.txt-and-then-some"));
667 assert!(!txt_pat.matches("goodbye.txt"));
669 let dir_pat = Pattern::new("*some/path/to/hello.txt");
670 assert!(dir_pat.matches("some/path/to/hello.txt"));
671 assert!(dir_pat.matches("a/bigger/some/path/to/hello.txt"));
672 assert!(!dir_pat.matches("some/path/to/hello.txt-and-then-some"));
673 assert!(!dir_pat.matches("some/other/path/to/hello.txt"));
677 fn test_pattern_escape() {
678 let s = "_[_]_?_*_!_";
679 assert_eq!(Pattern::escape(s), ~"_[[]_[]]_[?]_[*]_!_");
680 assert!(Pattern::new(Pattern::escape(s)).matches(s));
684 fn test_pattern_matches_case_insensitive() {
686 let pat = Pattern::new("aBcDeFg");
687 let options = MatchOptions {
688 case_sensitive: false,
689 require_literal_separator: false,
690 require_literal_leading_dot: false
693 assert!(pat.matches_with("aBcDeFg", options));
694 assert!(pat.matches_with("abcdefg", options));
695 assert!(pat.matches_with("ABCDEFG", options));
696 assert!(pat.matches_with("AbCdEfG", options));
700 fn test_pattern_matches_case_insensitive_range() {
702 let pat_within = Pattern::new("[a]");
703 let pat_except = Pattern::new("[!a]");
705 let options_case_insensitive = MatchOptions {
706 case_sensitive: false,
707 require_literal_separator: false,
708 require_literal_leading_dot: false
710 let options_case_sensitive = MatchOptions {
711 case_sensitive: true,
712 require_literal_separator: false,
713 require_literal_leading_dot: false
716 assert!(pat_within.matches_with("a", options_case_insensitive));
717 assert!(pat_within.matches_with("A", options_case_insensitive));
718 assert!(!pat_within.matches_with("A", options_case_sensitive));
720 assert!(!pat_except.matches_with("a", options_case_insensitive));
721 assert!(!pat_except.matches_with("A", options_case_insensitive));
722 assert!(pat_except.matches_with("A", options_case_sensitive));
726 fn test_pattern_matches_require_literal_separator() {
728 let options_require_literal = MatchOptions {
729 case_sensitive: true,
730 require_literal_separator: true,
731 require_literal_leading_dot: false
733 let options_not_require_literal = MatchOptions {
734 case_sensitive: true,
735 require_literal_separator: false,
736 require_literal_leading_dot: false
739 assert!(Pattern::new("abc/def").matches_with("abc/def", options_require_literal));
740 assert!(!Pattern::new("abc?def").matches_with("abc/def", options_require_literal));
741 assert!(!Pattern::new("abc*def").matches_with("abc/def", options_require_literal));
742 assert!(!Pattern::new("abc[/]def").matches_with("abc/def", options_require_literal));
744 assert!(Pattern::new("abc/def").matches_with("abc/def", options_not_require_literal));
745 assert!(Pattern::new("abc?def").matches_with("abc/def", options_not_require_literal));
746 assert!(Pattern::new("abc*def").matches_with("abc/def", options_not_require_literal));
747 assert!(Pattern::new("abc[/]def").matches_with("abc/def", options_not_require_literal));
751 fn test_pattern_matches_require_literal_leading_dot() {
753 let options_require_literal_leading_dot = MatchOptions {
754 case_sensitive: true,
755 require_literal_separator: false,
756 require_literal_leading_dot: true
758 let options_not_require_literal_leading_dot = MatchOptions {
759 case_sensitive: true,
760 require_literal_separator: false,
761 require_literal_leading_dot: false
764 let f = |options| Pattern::new("*.txt").matches_with(".hello.txt", options);
765 assert!(f(options_not_require_literal_leading_dot));
766 assert!(!f(options_require_literal_leading_dot));
768 let f = |options| Pattern::new(".*.*").matches_with(".hello.txt", options);
769 assert!(f(options_not_require_literal_leading_dot));
770 assert!(f(options_require_literal_leading_dot));
772 let f = |options| Pattern::new("aaa/bbb/*").matches_with("aaa/bbb/.ccc", options);
773 assert!(f(options_not_require_literal_leading_dot));
774 assert!(!f(options_require_literal_leading_dot));
776 let f = |options| Pattern::new("aaa/bbb/*").matches_with("aaa/bbb/c.c.c.", options);
777 assert!(f(options_not_require_literal_leading_dot));
778 assert!(f(options_require_literal_leading_dot));
780 let f = |options| Pattern::new("aaa/bbb/.*").matches_with("aaa/bbb/.ccc", options);
781 assert!(f(options_not_require_literal_leading_dot));
782 assert!(f(options_require_literal_leading_dot));
784 let f = |options| Pattern::new("aaa/?bbb").matches_with("aaa/.bbb", options);
785 assert!(f(options_not_require_literal_leading_dot));
786 assert!(!f(options_require_literal_leading_dot));
788 let f = |options| Pattern::new("aaa/[.]bbb").matches_with("aaa/.bbb", options);
789 assert!(f(options_not_require_literal_leading_dot));
790 assert!(!f(options_require_literal_leading_dot));
794 fn test_matches_path() {
795 // on windows, (Path::new("a/b").as_str().unwrap() == "a\\b"), so this
796 // tests that / and \ are considered equivalent on windows
797 assert!(Pattern::new("a/b").matches_path(&Path::new("a/b")));