auto merge of #12231 : wycats/rust/url_path_parse, r=alexcrichton

author bors <bors@rust-lang.org>

Wed, 19 Feb 2014 20:51:48 +0000 (12:51 -0800)

committer bors <bors@rust-lang.org>

Wed, 19 Feb 2014 20:51:48 +0000 (12:51 -0800)
author bors <bors@rust-lang.org>
Wed, 19 Feb 2014 20:51:48 +0000 (12:51 -0800)
committer bors <bors@rust-lang.org>
Wed, 19 Feb 2014 20:51:48 +0000 (12:51 -0800)
diff --combined src/libextra/url.rs

index c76c73dc4325bcab951ee93374537816927ff10f,580e11b1158740c2d2ec7241eb515bcf0dcf76be..4580dd93098831b4f18d9e19e0e073f70fd3c4fa
--- 1/src/libextra/url.rs
--- 2/src/libextra/url.rs
+++ b/src/libextra/url.rs
@@@ -1,4 -1,4 +1,4 @@@
- -// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
+ +// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   // file at the top-level directory of this distribution and at
   // http://rust-lang.org/COPYRIGHT.
   //
@@@ -55,6 -55,17 +55,17 @@@ pub struct Url 
       fragment: Option<~str>
   }
   
+ #[deriving(Clone, Eq)]
+ pub struct Path {
+     /// The path component of a URL, for example `/foo/bar`.
+     path: ~str,
+     /// The query component of a URL.  `~[(~"baz", ~"qux")]` represents the
+     /// fragment `baz=qux` in the above example.
+     query: Query,
+     /// The fragment component, such as `quz`.  Doesn't include the leading `#` character.
+     fragment: Option<~str>
+ }
+ 
   /// An optional subcomponent of a URI authority component.
   #[deriving(Clone, Eq)]
   pub struct UserInfo {
@@@ -88,6 -99,19 +99,19 @@@ impl Url 
       }
   }
   
+ impl Path {
+     pub fn new(path: ~str,
+                query: Query,
+                fragment: Option<~str>)
+                -> Path {
+         Path {
+             path: path,
+             query: query,
+             fragment: fragment,
+         }
+     }
+ }
+ 
   impl UserInfo {
       #[inline]
       pub fn new(user: ~str, pass: Option<~str>) -> UserInfo {
@@@ -139,19 -163,10 +163,19 @@@ fn encode_inner(s: &str, full_url: bool
   }
   
   /**
- - * Encodes a URI by replacing reserved characters with percent encoded
+ + * Encodes a URI by replacing reserved characters with percent-encoded
    * character sequences.
    *
    * This function is compliant with RFC 3986.
+ + *
+ + * # Example
+ + *
+ + * ```rust
+ + * use extra::url::encode;
+ + *
+ + * let url = encode(&"https://example.com/Rust (programming language)");
+ + * println!("{}", url); // https://example.com/Rust%20(programming%20language)
+ + * ```
    */
   pub fn encode(s: &str) -> ~str {
       encode_inner(s, true)
@@@ -215,18 -230,9 +239,18 @@@ fn decode_inner(s: &str, full_url: bool
   }
   
   /**
- - * Decode a string encoded with percent encoding.
+ + * Decodes a percent-encoded string representing a URI.
    *
- - * This will only decode escape sequences generated by encode.
+ + * This will only decode escape sequences generated by `encode`.
+ + *
+ + * # Example
+ + *
+ + * ```rust
+ + * use extra::url::decode;
+ + *
+ + * let url = decode(&"https://example.com/Rust%20(programming%20language)");
+ + * println!("{}", url); // https://example.com/Rust (programming language)
+ + * ```
    */
   pub fn decode(s: &str) -> ~str {
       decode_inner(s, true)
@@@ -428,23 -434,7 +452,23 @@@ pub fn query_to_str(query: &Query) -> ~
       return strvec.connect("&");
   }
   
- -// returns the scheme and the rest of the url, or a parsing error
+ +/**
+ + * Returns a tuple of the URI scheme and the rest of the URI, or a parsing error.
+ + *
+ + * Does not include the separating `:` character.
+ + *
+ + * # Example
+ + *
+ + * ```rust
+ + * use extra::url::get_scheme;
+ + *
+ + * let scheme = match get_scheme("https://example.com/") {
+ + *     Ok((sch, _)) => sch,
+ + *     Err(_) => ~"(None)",
+ + * };
+ + * println!("Scheme in use: {}.", scheme); // Scheme in use: https.
+ + * ```
+ + */
   pub fn get_scheme(rawurl: &str) -> Result<(~str, ~str), ~str> {
       for (i,c) in rawurl.chars().enumerate() {
           match c {
@@@ -688,16 -678,18 +712,16 @@@ fn get_query_fragment(rawurl: &str) -
   }
   
   /**
- - * Parse a `str` to a `url`
+ + * Parses a URL, converting it from a string to `Url` representation.
    *
    * # Arguments
    *
- - * `rawurl` - a string representing a full url, including scheme.
+ + * `rawurl` - a string representing the full URL, including scheme.
    *
    * # Returns
    *
- - * a `url` that contains the parsed representation of the url.
- - *
+ + * A `Url` struct type representing the URL.
    */
- -
   pub fn from_str(rawurl: &str) -> Result<Url, ~str> {
       // scheme
       let (scheme, rest) = match get_scheme(rawurl) {
@@@ -727,6 -719,21 +751,21 @@@
       Ok(Url::new(scheme, userinfo, host, port, path, query, fragment))
   }
   
+ pub fn path_from_str(rawpath: &str) -> Result<Path, ~str> {
+     let (path, rest) = match get_path(rawpath, false) {
+         Ok(val) => val,
+         Err(e) => return Err(e)
+     };
+ 
+     // query and fragment
+     let (query, fragment) = match get_query_fragment(rest) {
+         Ok(val) => val,
+         Err(e) => return Err(e),
+     };
+ 
+     Ok(Path{ path: path, query: query, fragment: fragment })
+ }
+ 
   impl FromStr for Url {
       fn from_str(s: &str) -> Option<Url> {
           match from_str(s) {
@@@ -736,19 -743,29 +775,28 @@@
       }
   }
   
+ impl FromStr for Path {
+     fn from_str(s: &str) -> Option<Path> {
+         match path_from_str(s) {
+             Ok(path) => Some(path),
+             Err(_) => None
+         }
+     }
+ }
+ 
   /**
- - * Format a `url` as a string
+ + * Converts a URL from `Url` to string representation.
    *
    * # Arguments
    *
- - * `url` - a url.
+ + * `url` - a URL.
    *
    * # Returns
    *
- - * a `str` that contains the formatted url. Note that this will usually
- - * be an inverse of `from_str` but might strip out unneeded separators.
+ + * A string that contains the formatted URL. Note that this will usually
+ + * be an inverse of `from_str` but might strip out unneeded separators;
    * for example, "http://somehost.com?", when parsed and formatted, will
    * result in just "http://somehost.com".
- - *
    */
   pub fn to_str(url: &Url) -> ~str {
       let user = match url.user {
@@@ -780,18 -797,45 +828,45 @@@
       format!("{}:{}{}{}{}", url.scheme, authority, url.path, query, fragment)
   }
   
+ pub fn path_to_str(path: &Path) -> ~str {
+     let query = if path.query.is_empty() {
+         ~""
+     } else {
+         format!("?{}", query_to_str(&path.query))
+     };
+ 
+     let fragment = match path.fragment {
+         Some(ref fragment) => format!("\\#{}", encode_component(*fragment)),
+         None => ~"",
+     };
+ 
+     format!("{}{}{}", path.path, query, fragment)
+ }
+ 
   impl ToStr for Url {
       fn to_str(&self) -> ~str {
           to_str(self)
       }
   }
   
+ impl ToStr for Path {
+     fn to_str(&self) -> ~str {
+         path_to_str(self)
+     }
+ }
+ 
   impl IterBytes for Url {
       fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) -> bool {
           self.to_str().iter_bytes(lsb0, f)
       }
   }
   
+ impl IterBytes for Path {
+     fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) -> bool {
+         self.to_str().iter_bytes(lsb0, f)
+     }
+ }
+ 
   // Put a few tests outside of the 'test' module so they can test the internal
   // functions and those functions don't need 'pub'
   
@@@ -899,6 -943,17 +974,17 @@@ mod tests 
           assert_eq!(&u.fragment, &Some(~"something"));
       }
   
+     #[test]
+     fn test_path_parse() {
+         let path = ~"/doc/~u?s=v#something";
+ 
+         let up = path_from_str(path);
+         let u = up.unwrap();
+         assert_eq!(&u.path, &~"/doc/~u");
+         assert_eq!(&u.query, &~[(~"s", ~"v")]);
+         assert_eq!(&u.fragment, &Some(~"something"));
+     }
+ 
       #[test]
       fn test_url_parse_host_slash() {
           let urlstr = ~"http://0.42.42.42/";
@@@ -907,6 -962,13 +993,13 @@@
           assert!(url.path == ~"/");
       }
   
+     #[test]
+     fn test_path_parse_host_slash() {
+         let pathstr = ~"/";
+         let path = path_from_str(pathstr).unwrap();
+         assert!(path.path == ~"/");
+     }
+ 
       #[test]
       fn test_url_host_with_port() {
           let urlstr = ~"scheme://host:1234";
@@@ -930,6 -992,13 +1023,13 @@@
           assert!(url.path == ~"/file_name.html");
       }
   
+     #[test]
+     fn test_path_with_underscores() {
+         let pathstr = ~"/file_name.html";
+         let path = path_from_str(pathstr).unwrap();
+         assert!(path.path == ~"/file_name.html");
+     }
+ 
       #[test]
       fn test_url_with_dashes() {
           let urlstr = ~"http://dotcom.com/file-name.html";
@@@ -937,6 -1006,13 +1037,13 @@@
           assert!(url.path == ~"/file-name.html");
       }
   
+     #[test]
+     fn test_path_with_dashes() {
+         let pathstr = ~"/file-name.html";
+         let path = path_from_str(pathstr).unwrap();
+         assert!(path.path == ~"/file-name.html");
+     }
+ 
       #[test]
       fn test_no_scheme() {
           assert!(get_scheme("noschemehere.html").is_err());
@@@ -1017,6 -1093,14 +1124,14 @@@
           assert!(u.query == ~[(~"ba%d ", ~"#&+")]);
       }
   
+     #[test]
+     fn test_path_component_encoding() {
+         let path = ~"/doc%20uments?ba%25d%20=%23%26%2B";
+         let p = path_from_str(path).unwrap();
+         assert!(p.path == ~"/doc uments");
+         assert!(p.query == ~[(~"ba%d ", ~"#&+")]);
+     }
+ 
       #[test]
       fn test_url_without_authority() {
           let url = ~"mailto:test@email.com";
author	bors <bors@rust-lang.org>
	Wed, 19 Feb 2014 20:51:48 +0000 (12:51 -0800)
committer	bors <bors@rust-lang.org>
	Wed, 19 Feb 2014 20:51:48 +0000 (12:51 -0800)