diff options
Diffstat (limited to 'third_party/rust/url/src')
-rw-r--r-- | third_party/rust/url/src/encoding.rs | 135 | ||||
-rw-r--r-- | third_party/rust/url/src/form_urlencoded.rs | 364 | ||||
-rw-r--r-- | third_party/rust/url/src/host.rs | 418 | ||||
-rw-r--r-- | third_party/rust/url/src/lib.rs | 1456 | ||||
-rw-r--r-- | third_party/rust/url/src/origin.rs | 114 | ||||
-rw-r--r-- | third_party/rust/url/src/parser.rs | 1179 | ||||
-rw-r--r-- | third_party/rust/url/src/path_segments.rs | 187 | ||||
-rw-r--r-- | third_party/rust/url/src/percent_encoding.rs | 344 | ||||
-rw-r--r-- | third_party/rust/url/src/quirks.rs | 217 | ||||
-rw-r--r-- | third_party/rust/url/src/slicing.rs | 182 |
10 files changed, 4596 insertions, 0 deletions
diff --git a/third_party/rust/url/src/encoding.rs b/third_party/rust/url/src/encoding.rs new file mode 100644 index 000000000..0703c788f --- /dev/null +++ b/third_party/rust/url/src/encoding.rs @@ -0,0 +1,135 @@ +// Copyright 2013-2014 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + + +//! Abstraction that conditionally compiles either to rust-encoding, +//! or to only support UTF-8. + +#[cfg(feature = "query_encoding")] extern crate encoding; + +use std::borrow::Cow; + +#[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap}; +#[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label; +#[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef; + +#[cfg(feature = "query_encoding")] +#[derive(Copy, Clone)] +pub struct EncodingOverride { + /// `None` means UTF-8. + encoding: Option<EncodingRef> +} + +#[cfg(feature = "query_encoding")] +impl EncodingOverride { + pub fn from_opt_encoding(encoding: Option<EncodingRef>) -> Self { + encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8) + } + + pub fn from_encoding(encoding: EncodingRef) -> Self { + EncodingOverride { + encoding: if encoding.name() == "utf-8" { None } else { Some(encoding) } + } + } + + #[inline] + pub fn utf8() -> Self { + EncodingOverride { encoding: None } + } + + pub fn lookup(label: &[u8]) -> Option<Self> { + // Don't use String::from_utf8_lossy since no encoding label contains U+FFFD + // https://encoding.spec.whatwg.org/#names-and-labels + ::std::str::from_utf8(label) + .ok() + .and_then(encoding_from_whatwg_label) + .map(Self::from_encoding) + } + + /// https://encoding.spec.whatwg.org/#get-an-output-encoding + pub fn to_output_encoding(self) -> Self { + if let Some(encoding) = self.encoding { + if matches!(encoding.name(), "utf-16le" | "utf-16be") { + return Self::utf8() + } + } + self + } + + pub fn is_utf8(&self) -> bool { + self.encoding.is_none() + } + + pub fn name(&self) -> &'static str { + match self.encoding { + Some(encoding) => encoding.name(), + None => "utf-8", + } + } + + pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { + match self.encoding { + // `encoding.decode` never returns `Err` when called with `DecoderTrap::Replace` + Some(encoding) => encoding.decode(&input, DecoderTrap::Replace).unwrap().into(), + None => decode_utf8_lossy(input), + } + } + + pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { + match self.encoding { + // `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape` + Some(encoding) => Cow::Owned(encoding.encode(&input, EncoderTrap::NcrEscape).unwrap()), + None => encode_utf8(input) + } + } +} + + +#[cfg(not(feature = "query_encoding"))] +#[derive(Copy, Clone)] +pub struct EncodingOverride; + +#[cfg(not(feature = "query_encoding"))] +impl EncodingOverride { + #[inline] + pub fn utf8() -> Self { + EncodingOverride + } + + pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { + decode_utf8_lossy(input) + } + + pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { + encode_utf8(input) + } +} + +pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> { + match input { + Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), + Cow::Owned(bytes) => { + let raw_utf8: *const [u8]; + match String::from_utf8_lossy(&bytes) { + Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(), + Cow::Owned(s) => return s.into(), + } + // from_utf8_lossy returned a borrow of `bytes` unchanged. + debug_assert!(raw_utf8 == &*bytes as *const [u8]); + // Reuse the existing `Vec` allocation. + unsafe { String::from_utf8_unchecked(bytes) }.into() + } + } +} + +pub fn encode_utf8(input: Cow<str>) -> Cow<[u8]> { + match input { + Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), + Cow::Owned(s) => Cow::Owned(s.into_bytes()) + } +} diff --git a/third_party/rust/url/src/form_urlencoded.rs b/third_party/rust/url/src/form_urlencoded.rs new file mode 100644 index 000000000..f4a655507 --- /dev/null +++ b/third_party/rust/url/src/form_urlencoded.rs @@ -0,0 +1,364 @@ +// Copyright 2013-2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Parser and serializer for the [`application/x-www-form-urlencoded` syntax]( +//! http://url.spec.whatwg.org/#application/x-www-form-urlencoded), +//! as used by HTML forms. +//! +//! Converts between a string (such as an URL’s query string) +//! and a sequence of (name, value) pairs. + +use encoding::EncodingOverride; +use percent_encoding::{percent_encode_byte, percent_decode}; +use std::borrow::{Borrow, Cow}; +use std::str; + + +/// Convert a byte string in the `application/x-www-form-urlencoded` syntax +/// into a iterator of (name, value) pairs. +/// +/// Use `parse(input.as_bytes())` to parse a `&str` string. +/// +/// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be +/// converted to `[("#first", "%try%")]`. +#[inline] +pub fn parse(input: &[u8]) -> Parse { + Parse { + input: input, + encoding: EncodingOverride::utf8(), + } +} + + +/// Convert a byte string in the `application/x-www-form-urlencoded` syntax +/// into a iterator of (name, value) pairs. +/// +/// Use `parse(input.as_bytes())` to parse a `&str` string. +/// +/// This function is only available if the `query_encoding` Cargo feature is enabled. +/// +/// Arguments: +/// +/// * `encoding_override`: The character encoding each name and values is decoded as +/// after percent-decoding. Defaults to UTF-8. +/// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`. +#[cfg(feature = "query_encoding")] +pub fn parse_with_encoding<'a>(input: &'a [u8], + encoding_override: Option<::encoding::EncodingRef>, + use_charset: bool) + -> Result<Parse<'a>, ()> { + use std::ascii::AsciiExt; + + let mut encoding = EncodingOverride::from_opt_encoding(encoding_override); + if !(encoding.is_utf8() || input.is_ascii()) { + return Err(()) + } + if use_charset { + for sequence in input.split(|&b| b == b'&') { + // No '+' in "_charset_" to replace with ' '. + if sequence.starts_with(b"_charset_=") { + let value = &sequence[b"_charset_=".len()..]; + // Skip replacing '+' with ' ' in value since no encoding label contains either: + // https://encoding.spec.whatwg.org/#names-and-labels + if let Some(e) = EncodingOverride::lookup(value) { + encoding = e; + break + } + } + } + } + Ok(Parse { + input: input, + encoding: encoding, + }) +} + +/// The return type of `parse()`. +#[derive(Copy, Clone)] +pub struct Parse<'a> { + input: &'a [u8], + encoding: EncodingOverride, +} + +impl<'a> Iterator for Parse<'a> { + type Item = (Cow<'a, str>, Cow<'a, str>); + + fn next(&mut self) -> Option<Self::Item> { + loop { + if self.input.is_empty() { + return None + } + let mut split2 = self.input.splitn(2, |&b| b == b'&'); + let sequence = split2.next().unwrap(); + self.input = split2.next().unwrap_or(&[][..]); + if sequence.is_empty() { + continue + } + let mut split2 = sequence.splitn(2, |&b| b == b'='); + let name = split2.next().unwrap(); + let value = split2.next().unwrap_or(&[][..]); + return Some(( + decode(name, self.encoding), + decode(value, self.encoding), + )) + } + } +} + +fn decode(input: &[u8], encoding: EncodingOverride) -> Cow<str> { + let replaced = replace_plus(input); + encoding.decode(match percent_decode(&replaced).if_any() { + Some(vec) => Cow::Owned(vec), + None => replaced, + }) +} + +/// Replace b'+' with b' ' +fn replace_plus<'a>(input: &'a [u8]) -> Cow<'a, [u8]> { + match input.iter().position(|&b| b == b'+') { + None => Cow::Borrowed(input), + Some(first_position) => { + let mut replaced = input.to_owned(); + replaced[first_position] = b' '; + for byte in &mut replaced[first_position + 1..] { + if *byte == b'+' { + *byte = b' '; + } + } + Cow::Owned(replaced) + } + } +} + +impl<'a> Parse<'a> { + /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`. + pub fn into_owned(self) -> ParseIntoOwned<'a> { + ParseIntoOwned { inner: self } + } +} + +/// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`. +pub struct ParseIntoOwned<'a> { + inner: Parse<'a> +} + +impl<'a> Iterator for ParseIntoOwned<'a> { + type Item = (String, String); + + fn next(&mut self) -> Option<Self::Item> { + self.inner.next().map(|(k, v)| (k.into_owned(), v.into_owned())) + } +} + +/// The [`application/x-www-form-urlencoded` byte serializer]( +/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer). +/// +/// Return an iterator of `&str` slices. +pub fn byte_serialize(input: &[u8]) -> ByteSerialize { + ByteSerialize { + bytes: input, + } +} + +/// Return value of `byte_serialize()`. +pub struct ByteSerialize<'a> { + bytes: &'a [u8], +} + +fn byte_serialized_unchanged(byte: u8) -> bool { + matches!(byte, b'*' | b'-' | b'.' | b'0' ... b'9' | b'A' ... b'Z' | b'_' | b'a' ... b'z') +} + +impl<'a> Iterator for ByteSerialize<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + if let Some((&first, tail)) = self.bytes.split_first() { + if !byte_serialized_unchanged(first) { + self.bytes = tail; + return Some(if first == b' ' { "+" } else { percent_encode_byte(first) }) + } + let position = tail.iter().position(|&b| !byte_serialized_unchanged(b)); + let (unchanged_slice, remaining) = match position { + // 1 for first_byte + i unchanged in tail + Some(i) => self.bytes.split_at(1 + i), + None => (self.bytes, &[][..]), + }; + self.bytes = remaining; + Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + if self.bytes.is_empty() { + (0, Some(0)) + } else { + (1, Some(self.bytes.len())) + } + } +} + +/// The [`application/x-www-form-urlencoded` serializer]( +/// https://url.spec.whatwg.org/#concept-urlencoded-serializer). +pub struct Serializer<T: Target> { + target: Option<T>, + start_position: usize, + encoding: EncodingOverride, +} + +pub trait Target { + fn as_mut_string(&mut self) -> &mut String; + fn finish(self) -> Self::Finished; + type Finished; +} + +impl Target for String { + fn as_mut_string(&mut self) -> &mut String { self } + fn finish(self) -> Self { self } + type Finished = Self; +} + +impl<'a> Target for &'a mut String { + fn as_mut_string(&mut self) -> &mut String { &mut **self } + fn finish(self) -> Self { self } + type Finished = Self; +} + +// `as_mut_string` string here exposes the internal serialization of an `Url`, +// which should not be exposed to users. +// We achieve that by not giving users direct access to `UrlQuery`: +// * Its fields are private +// (and so can not be constructed with struct literal syntax outside of this crate), +// * It has no constructor +// * It is only visible (on the type level) to users in the return type of +// `Url::query_pairs_mut` which is `Serializer<UrlQuery>` +// * `Serializer` keeps its target in a private field +// * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`. +impl<'a> Target for ::UrlQuery<'a> { + fn as_mut_string(&mut self) -> &mut String { &mut self.url.serialization } + fn finish(self) -> &'a mut ::Url { self.url } + type Finished = &'a mut ::Url; +} + +impl<T: Target> Serializer<T> { + /// Create a new `application/x-www-form-urlencoded` serializer for the given target. + /// + /// If the target is non-empty, + /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. + pub fn new(target: T) -> Self { + Self::for_suffix(target, 0) + } + + /// Create a new `application/x-www-form-urlencoded` serializer + /// for a suffix of the given target. + /// + /// If that suffix is non-empty, + /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. + pub fn for_suffix(mut target: T, start_position: usize) -> Self { + &target.as_mut_string()[start_position..]; // Panic if out of bounds + Serializer { + target: Some(target), + start_position: start_position, + encoding: EncodingOverride::utf8(), + } + } + + /// Remove any existing name/value pair. + /// + /// Panics if called after `.finish()`. + pub fn clear(&mut self) -> &mut Self { + string(&mut self.target).truncate(self.start_position); + self + } + + /// Set the character encoding to be used for names and values before percent-encoding. + #[cfg(feature = "query_encoding")] + pub fn encoding_override(&mut self, new: Option<::encoding::EncodingRef>) -> &mut Self { + self.encoding = EncodingOverride::from_opt_encoding(new).to_output_encoding(); + self + } + + /// Serialize and append a name/value pair. + /// + /// Panics if called after `.finish()`. + pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self { + append_pair(string(&mut self.target), self.start_position, self.encoding, name, value); + self + } + + /// Serialize and append a number of name/value pairs. + /// + /// This simply calls `append_pair` repeatedly. + /// This can be more convenient, so the user doesn’t need to introduce a block + /// to limit the scope of `Serializer`’s borrow of its string. + /// + /// Panics if called after `.finish()`. + pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self + where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str> { + { + let string = string(&mut self.target); + for pair in iter { + let &(ref k, ref v) = pair.borrow(); + append_pair(string, self.start_position, self.encoding, k.as_ref(), v.as_ref()); + } + } + self + } + + /// Add a name/value pair whose name is `_charset_` + /// and whose value is the character encoding’s name. + /// (See the `encoding_override()` method.) + /// + /// Panics if called after `.finish()`. + #[cfg(feature = "query_encoding")] + pub fn append_charset(&mut self) -> &mut Self { + { + let string = string(&mut self.target); + append_separator_if_needed(string, self.start_position); + string.push_str("_charset_="); + string.push_str(self.encoding.name()); + } + self + } + + /// If this serializer was constructed with a string, take and return that string. + /// + /// ```rust + /// use url::form_urlencoded; + /// let encoded: String = form_urlencoded::Serializer::new(String::new()) + /// .append_pair("foo", "bar & baz") + /// .append_pair("saison", "Été+hiver") + /// .finish(); + /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver"); + /// ``` + /// + /// Panics if called more than once. + pub fn finish(&mut self) -> T::Finished { + self.target.take().expect("url::form_urlencoded::Serializer double finish").finish() + } +} + +fn append_separator_if_needed(string: &mut String, start_position: usize) { + if string.len() > start_position { + string.push('&') + } +} + +fn string<T: Target>(target: &mut Option<T>) -> &mut String { + target.as_mut().expect("url::form_urlencoded::Serializer finished").as_mut_string() +} + +fn append_pair(string: &mut String, start_position: usize, encoding: EncodingOverride, + name: &str, value: &str) { + append_separator_if_needed(string, start_position); + string.extend(byte_serialize(&encoding.encode(name.into()))); + string.push('='); + string.extend(byte_serialize(&encoding.encode(value.into()))); +} diff --git a/third_party/rust/url/src/host.rs b/third_party/rust/url/src/host.rs new file mode 100644 index 000000000..47b049a27 --- /dev/null +++ b/third_party/rust/url/src/host.rs @@ -0,0 +1,418 @@ +// Copyright 2013-2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; +use std::cmp; +use std::fmt::{self, Formatter}; +use std::io; +use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6, ToSocketAddrs}; +use std::vec; +use parser::{ParseResult, ParseError}; +use percent_encoding::percent_decode; +use idna; + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum HostInternal { + None, + Domain, + Ipv4(Ipv4Addr), + Ipv6(Ipv6Addr), +} + +#[cfg(feature = "heapsize")] +known_heap_size!(0, HostInternal); + +impl<S> From<Host<S>> for HostInternal { + fn from(host: Host<S>) -> HostInternal { + match host { + Host::Domain(_) => HostInternal::Domain, + Host::Ipv4(address) => HostInternal::Ipv4(address), + Host::Ipv6(address) => HostInternal::Ipv6(address), + } + } +} + +/// The host name of an URL. +#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Host<S=String> { + /// A DNS domain name, as '.' dot-separated labels. + /// Non-ASCII labels are encoded in punycode per IDNA. + Domain(S), + + /// An IPv4 address. + /// `Url::host_str` returns the serialization of this address, + /// as four decimal integers separated by `.` dots. + Ipv4(Ipv4Addr), + + /// An IPv6 address. + /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets, + /// in the format per [RFC 5952 *A Recommendation + /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952): + /// lowercase hexadecimal with maximal `::` compression. + Ipv6(Ipv6Addr), +} + +#[cfg(feature = "heapsize")] +impl<S: HeapSizeOf> HeapSizeOf for Host<S> { + fn heap_size_of_children(&self) -> usize { + match *self { + Host::Domain(ref s) => s.heap_size_of_children(), + _ => 0, + } + } +} + +impl<'a> Host<&'a str> { + /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`. + pub fn to_owned(&self) -> Host<String> { + match *self { + Host::Domain(domain) => Host::Domain(domain.to_owned()), + Host::Ipv4(address) => Host::Ipv4(address), + Host::Ipv6(address) => Host::Ipv6(address), + } + } +} + +impl Host<String> { + /// Parse a host: either an IPv6 address in [] square brackets, or a domain. + /// + /// https://url.spec.whatwg.org/#host-parsing + pub fn parse(input: &str) -> Result<Self, ParseError> { + if input.starts_with("[") { + if !input.ends_with("]") { + return Err(ParseError::InvalidIpv6Address) + } + return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6) + } + let domain = percent_decode(input.as_bytes()).decode_utf8_lossy(); + let domain = try!(idna::domain_to_ascii(&domain)); + if domain.find(|c| matches!(c, + '\0' | '\t' | '\n' | '\r' | ' ' | '#' | '%' | '/' | ':' | '?' | '@' | '[' | '\\' | ']' + )).is_some() { + return Err(ParseError::InvalidDomainCharacter) + } + if let Some(address) = try!(parse_ipv4addr(&domain)) { + Ok(Host::Ipv4(address)) + } else { + Ok(Host::Domain(domain.into())) + } + } +} + +impl<S: AsRef<str>> fmt::Display for Host<S> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match *self { + Host::Domain(ref domain) => domain.as_ref().fmt(f), + Host::Ipv4(ref addr) => addr.fmt(f), + Host::Ipv6(ref addr) => { + try!(f.write_str("[")); + try!(write_ipv6(addr, f)); + f.write_str("]") + } + } + } +} + +/// This mostly exists because coherence rules don’t allow us to implement +/// `ToSocketAddrs for (Host<S>, u16)`. +pub struct HostAndPort<S=String> { + pub host: Host<S>, + pub port: u16, +} + +impl<'a> HostAndPort<&'a str> { + /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`. + pub fn to_owned(&self) -> HostAndPort<String> { + HostAndPort { + host: self.host.to_owned(), + port: self.port + } + } +} + +impl<S: AsRef<str>> ToSocketAddrs for HostAndPort<S> { + type Iter = SocketAddrs; + + fn to_socket_addrs(&self) -> io::Result<Self::Iter> { + let port = self.port; + match self.host { + Host::Domain(ref domain) => Ok(SocketAddrs { + // FIXME: use std::net::lookup_host when it’s stable. + state: SocketAddrsState::Domain(try!((domain.as_ref(), port).to_socket_addrs())) + }), + Host::Ipv4(address) => Ok(SocketAddrs { + state: SocketAddrsState::One(SocketAddr::V4(SocketAddrV4::new(address, port))) + }), + Host::Ipv6(address) => Ok(SocketAddrs { + state: SocketAddrsState::One(SocketAddr::V6(SocketAddrV6::new(address, port, 0, 0))) + }), + } + } +} + +/// Socket addresses for an URL. +pub struct SocketAddrs { + state: SocketAddrsState +} + +enum SocketAddrsState { + Domain(vec::IntoIter<SocketAddr>), + One(SocketAddr), + Done, +} + +impl Iterator for SocketAddrs { + type Item = SocketAddr; + fn next(&mut self) -> Option<SocketAddr> { + match self.state { + SocketAddrsState::Domain(ref mut iter) => iter.next(), + SocketAddrsState::One(s) => { + self.state = SocketAddrsState::Done; + Some(s) + } + SocketAddrsState::Done => None + } + } +} + +fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter) -> fmt::Result { + let segments = addr.segments(); + let (compress_start, compress_end) = longest_zero_sequence(&segments); + let mut i = 0; + while i < 8 { + if i == compress_start { + try!(f.write_str(":")); + if i == 0 { + try!(f.write_str(":")); + } + if compress_end < 8 { + i = compress_end; + } else { + break; + } + } + try!(write!(f, "{:x}", segments[i as usize])); + if i < 7 { + try!(f.write_str(":")); + } + i += 1; + } + Ok(()) +} + +fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) { + let mut longest = -1; + let mut longest_length = -1; + let mut start = -1; + macro_rules! finish_sequence( + ($end: expr) => { + if start >= 0 { + let length = $end - start; + if length > longest_length { + longest = start; + longest_length = length; + } + } + }; + ); + for i in 0..8 { + if pieces[i as usize] == 0 { + if start < 0 { + start = i; + } + } else { + finish_sequence!(i); + start = -1; + } + } + finish_sequence!(8); + (longest, longest + longest_length) +} + + +fn parse_ipv4number(mut input: &str) -> Result<u32, ()> { + let mut r = 10; + if input.starts_with("0x") || input.starts_with("0X") { + input = &input[2..]; + r = 16; + } else if input.len() >= 2 && input.starts_with("0") { + input = &input[1..]; + r = 8; + } + if input.is_empty() { + return Ok(0); + } + if input.starts_with("+") { + return Err(()) + } + match u32::from_str_radix(&input, r) { + Ok(number) => Ok(number), + Err(_) => Err(()), + } +} + +fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> { + if input.is_empty() { + return Ok(None) + } + let mut parts: Vec<&str> = input.split('.').collect(); + if parts.last() == Some(&"") { + parts.pop(); + } + if parts.len() > 4 { + return Ok(None); + } + let mut numbers: Vec<u32> = Vec::new(); + for part in parts { + if part == "" { + return Ok(None); + } + if let Ok(n) = parse_ipv4number(part) { + numbers.push(n); + } else { + return Ok(None); + } + } + let mut ipv4 = numbers.pop().expect("a non-empty list of numbers"); + // Equivalent to: ipv4 >= 256 ** (4 − numbers.len()) + if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) { + return Err(ParseError::InvalidIpv4Address); + } + if numbers.iter().any(|x| *x > 255) { + return Err(ParseError::InvalidIpv4Address); + } + for (counter, n) in numbers.iter().enumerate() { + ipv4 += n << (8 * (3 - counter as u32)) + } + Ok(Some(Ipv4Addr::from(ipv4))) +} + + +fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> { + let input = input.as_bytes(); + let len = input.len(); + let mut is_ip_v4 = false; + let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0]; + let mut piece_pointer = 0; + let mut compress_pointer = None; + let mut i = 0; + + if len < 2 { + return Err(ParseError::InvalidIpv6Address) + } + + if input[0] == b':' { + if input[1] != b':' { + return Err(ParseError::InvalidIpv6Address) + } + i = 2; + piece_pointer = 1; + compress_pointer = Some(1); + } + + while i < len { + if piece_pointer == 8 { + return Err(ParseError::InvalidIpv6Address) + } + if input[i] == b':' { + if compress_pointer.is_some() { + return Err(ParseError::InvalidIpv6Address) + } + i += 1; + piece_pointer += 1; + compress_pointer = Some(piece_pointer); + continue + } + let start = i; + let end = cmp::min(len, start + 4); + let mut value = 0u16; + while i < end { + match (input[i] as char).to_digit(16) { + Some(digit) => { + value = value * 0x10 + digit as u16; + i += 1; + }, + None => break + } + } + if i < len { + match input[i] { + b'.' => { + if i == start { + return Err(ParseError::InvalidIpv6Address) + } + i = start; + is_ip_v4 = true; + }, + b':' => { + i += 1; + if i == len { + return Err(ParseError::InvalidIpv6Address) + } + }, + _ => return Err(ParseError::InvalidIpv6Address) + } + } + if is_ip_v4 { + break + } + pieces[piece_pointer] = value; + piece_pointer += 1; + } + + if is_ip_v4 { + if piece_pointer > 6 { + return Err(ParseError::InvalidIpv6Address) + } + let mut dots_seen = 0; + while i < len { + // FIXME: https://github.com/whatwg/url/commit/1c22aa119c354e0020117e02571cec53f7c01064 + let mut value = 0u16; + while i < len { + let digit = match input[i] { + c @ b'0' ... b'9' => c - b'0', + _ => break + }; + value = value * 10 + digit as u16; + if value == 0 || value > 255 { + return Err(ParseError::InvalidIpv6Address) + } + } + if dots_seen < 3 && !(i < len && input[i] == b'.') { + return Err(ParseError::InvalidIpv6Address) + } + pieces[piece_pointer] = pieces[piece_pointer] * 0x100 + value; + if dots_seen == 0 || dots_seen == 2 { + piece_pointer += 1; + } + i += 1; + if dots_seen == 3 && i < len { + return Err(ParseError::InvalidIpv6Address) + } + dots_seen += 1; + } + } + + match compress_pointer { + Some(compress_pointer) => { + let mut swaps = piece_pointer - compress_pointer; + piece_pointer = 7; + while swaps > 0 { + pieces[piece_pointer] = pieces[compress_pointer + swaps - 1]; + pieces[compress_pointer + swaps - 1] = 0; + swaps -= 1; + piece_pointer -= 1; + } + } + _ => if piece_pointer != 8 { + return Err(ParseError::InvalidIpv6Address) + } + } + Ok(Ipv6Addr::new(pieces[0], pieces[1], pieces[2], pieces[3], + pieces[4], pieces[5], pieces[6], pieces[7])) +} diff --git a/third_party/rust/url/src/lib.rs b/third_party/rust/url/src/lib.rs new file mode 100644 index 000000000..9378318b4 --- /dev/null +++ b/third_party/rust/url/src/lib.rs @@ -0,0 +1,1456 @@ +// Copyright 2013-2015 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! + +rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/) +for the [Rust](http://rust-lang.org/) programming language. + +It builds with [Cargo](http://crates.io/). +To use it in your project, add this to your `Cargo.toml` file: + +```Cargo +[dependencies.url] +git = "https://github.com/servo/rust-url" +``` + +Supporting encodings other than UTF-8 in query strings is an optional feature +that requires [rust-encoding](https://github.com/lifthrasiir/rust-encoding) +and is off by default. +You can enable it with +[Cargo’s *features* mechanism](http://doc.crates.io/manifest.html#the-[features]-section): + +```Cargo +[dependencies.url] +git = "https://github.com/servo/rust-url" +features = ["query_encoding"] +``` + +… or by passing `--cfg 'feature="query_encoding"'` to rustc. + + +# URL parsing and data structures + +First, URL parsing may fail for various reasons and therefore returns a `Result`. + +``` +use url::{Url, ParseError}; + +assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address)) +``` + +Let’s parse a valid URL and look at its components. + +``` +use url::{Url, Host}; + +let issue_list_url = Url::parse( + "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open" +).unwrap(); + + +assert!(issue_list_url.scheme() == "https"); +assert!(issue_list_url.username() == ""); +assert!(issue_list_url.password() == None); +assert!(issue_list_url.host_str() == Some("github.com")); +assert!(issue_list_url.host() == Some(Host::Domain("github.com"))); +assert!(issue_list_url.port() == None); +assert!(issue_list_url.path() == "/rust-lang/rust/issues"); +assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) == + Some(vec!["rust-lang", "rust", "issues"])); +assert!(issue_list_url.query() == Some("labels=E-easy&state=open")); +assert!(issue_list_url.fragment() == None); +assert!(!issue_list_url.cannot_be_a_base()); +``` + +Some URLs are said to be *cannot-be-a-base*: +they don’t have a username, password, host, or port, +and their "path" is an arbitrary string rather than slash-separated segments: + +``` +use url::Url; + +let data_url = Url::parse("data:text/plain,Hello?World#").unwrap(); + +assert!(data_url.cannot_be_a_base()); +assert!(data_url.scheme() == "data"); +assert!(data_url.path() == "text/plain,Hello"); +assert!(data_url.path_segments().is_none()); +assert!(data_url.query() == Some("World")); +assert!(data_url.fragment() == Some("")); +``` + + +# Base URL + +Many contexts allow URL *references* that can be relative to a *base URL*: + +```html +<link rel="stylesheet" href="../main.css"> +``` + +Since parsed URL are absolute, giving a base is required for parsing relative URLs: + +``` +use url::{Url, ParseError}; + +assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase)) +``` + +Use the `join` method on an `Url` to use it as a base URL: + +``` +use url::Url; + +let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap(); +let css_url = this_document.join("../main.css").unwrap(); +assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css") +*/ + +#[cfg(feature="rustc-serialize")] extern crate rustc_serialize; +#[macro_use] extern crate matches; +#[cfg(feature="serde")] extern crate serde; +#[cfg(feature="heapsize")] #[macro_use] extern crate heapsize; + +pub extern crate idna; + +use encoding::EncodingOverride; +#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; +use host::HostInternal; +use parser::{Parser, Context, SchemeType, to_u32}; +use percent_encoding::{PATH_SEGMENT_ENCODE_SET, USERINFO_ENCODE_SET, + percent_encode, percent_decode, utf8_percent_encode}; +use std::cmp; +use std::fmt::{self, Write}; +use std::hash; +use std::io; +use std::mem; +use std::net::{ToSocketAddrs, IpAddr}; +use std::ops::{Range, RangeFrom, RangeTo}; +use std::path::{Path, PathBuf}; +use std::str; + +pub use origin::{Origin, OpaqueOrigin}; +pub use host::{Host, HostAndPort, SocketAddrs}; +pub use path_segments::PathSegmentsMut; +pub use parser::ParseError; +pub use slicing::Position; + +mod encoding; +mod host; +mod origin; +mod path_segments; +mod parser; +mod slicing; + +pub mod form_urlencoded; +pub mod percent_encoding; +pub mod quirks; + +/// A parsed URL record. +#[derive(Clone)] +pub struct Url { + /// Syntax in pseudo-BNF: + /// + /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]? + /// non-hierarchical = non-hierarchical-path + /// non-hierarchical-path = /* Does not start with "/" */ + /// hierarchical = authority? hierarchical-path + /// authority = "//" userinfo? host [ ":" port ]? + /// userinfo = username [ ":" password ]? "@" + /// hierarchical-path = [ "/" path-segment ]+ + serialization: String, + + // Components + scheme_end: u32, // Before ':' + username_end: u32, // Before ':' (if a password is given) or '@' (if not) + host_start: u32, + host_end: u32, + host: HostInternal, + port: Option<u16>, + path_start: u32, // Before initial '/', if any + query_start: Option<u32>, // Before '?', unlike Position::QueryStart + fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart +} + +#[cfg(feature = "heapsize")] +impl HeapSizeOf for Url { + fn heap_size_of_children(&self) -> usize { + self.serialization.heap_size_of_children() + } +} + +/// Full configuration for the URL parser. +#[derive(Copy, Clone)] +pub struct ParseOptions<'a> { + base_url: Option<&'a Url>, + encoding_override: encoding::EncodingOverride, + log_syntax_violation: Option<&'a Fn(&'static str)>, +} + +impl<'a> ParseOptions<'a> { + /// Change the base URL + pub fn base_url(mut self, new: Option<&'a Url>) -> Self { + self.base_url = new; + self + } + + /// Override the character encoding of query strings. + /// This is a legacy concept only relevant for HTML. + #[cfg(feature = "query_encoding")] + pub fn encoding_override(mut self, new: Option<encoding::EncodingRef>) -> Self { + self.encoding_override = EncodingOverride::from_opt_encoding(new).to_output_encoding(); + self + } + + /// Call the provided function or closure on non-fatal parse errors. + pub fn log_syntax_violation(mut self, new: Option<&'a Fn(&'static str)>) -> Self { + self.log_syntax_violation = new; + self + } + + /// Parse an URL string with the configuration so far. + pub fn parse(self, input: &str) -> Result<Url, ::ParseError> { + Parser { + serialization: String::with_capacity(input.len()), + base_url: self.base_url, + query_encoding_override: self.encoding_override, + log_syntax_violation: self.log_syntax_violation, + context: Context::UrlParser, + }.parse_url(input) + } +} + +impl Url { + /// Parse an absolute URL from a string. + #[inline] + pub fn parse(input: &str) -> Result<Url, ::ParseError> { + Url::options().parse(input) + } + + /// Parse a string as an URL, with this URL as the base URL. + #[inline] + pub fn join(&self, input: &str) -> Result<Url, ::ParseError> { + Url::options().base_url(Some(self)).parse(input) + } + + /// Return a default `ParseOptions` that can fully configure the URL parser. + pub fn options<'a>() -> ParseOptions<'a> { + ParseOptions { + base_url: None, + encoding_override: EncodingOverride::utf8(), + log_syntax_violation: None, + } + } + + /// Return the serialization of this URL. + /// + /// This is fast since that serialization is already stored in the `Url` struct. + #[inline] + pub fn as_str(&self) -> &str { + &self.serialization + } + + /// Return the serialization of this URL. + /// + /// This consumes the `Url` and takes ownership of the `String` stored in it. + #[inline] + pub fn into_string(self) -> String { + self.serialization + } + + /// For internal testing, not part of the public API. + /// + /// Methods of the `Url` struct assume a number of invariants. + /// This checks each of these invariants and panic if one is not met. + /// This is for testing rust-url itself. + #[doc(hidden)] + pub fn assert_invariants(&self) { + macro_rules! assert { + ($x: expr) => { + if !$x { + panic!("!( {} ) for URL {:?}", stringify!($x), self.serialization) + } + } + } + + macro_rules! assert_eq { + ($a: expr, $b: expr) => { + { + let a = $a; + let b = $b; + if a != b { + panic!("{:?} != {:?} ({} != {}) for URL {:?}", + a, b, stringify!($a), stringify!($b), self.serialization) + } + } + } + } + + assert!(self.scheme_end >= 1); + assert!(matches!(self.byte_at(0), b'a'...b'z' | b'A'...b'Z')); + assert!(self.slice(1..self.scheme_end).chars() + .all(|c| matches!(c, 'a'...'z' | 'A'...'Z' | '0'...'9' | '+' | '-' | '.'))); + assert_eq!(self.byte_at(self.scheme_end), b':'); + + if self.slice(self.scheme_end + 1 ..).starts_with("//") { + // URL with authority + match self.byte_at(self.username_end) { + b':' => { + assert!(self.host_start >= self.username_end + 2); + assert_eq!(self.byte_at(self.host_start - 1), b'@'); + } + b'@' => assert!(self.host_start == self.username_end + 1), + _ => assert_eq!(self.username_end, self.scheme_end + 3), + } + assert!(self.host_start >= self.username_end); + assert!(self.host_end >= self.host_start); + let host_str = self.slice(self.host_start..self.host_end); + match self.host { + HostInternal::None => assert_eq!(host_str, ""), + HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()), + HostInternal::Ipv6(address) => assert_eq!(host_str, format!("[{}]", address)), + HostInternal::Domain => { + if SchemeType::from(self.scheme()).is_special() { + assert!(!host_str.is_empty()) + } + } + } + if self.path_start == self.host_end { + assert_eq!(self.port, None); + } else { + assert_eq!(self.byte_at(self.host_end), b':'); + let port_str = self.slice(self.host_end + 1..self.path_start); + assert_eq!(self.port, Some(port_str.parse::<u16>().expect("Couldn't parse port?"))); + } + assert_eq!(self.byte_at(self.path_start), b'/'); + } else { + // Anarchist URL (no authority) + assert_eq!(self.username_end, self.scheme_end + 1); + assert_eq!(self.host_start, self.scheme_end + 1); + assert_eq!(self.host_end, self.scheme_end + 1); + assert_eq!(self.host, HostInternal::None); + assert_eq!(self.port, None); + assert_eq!(self.path_start, self.scheme_end + 1); + } + if let Some(start) = self.query_start { + assert!(start > self.path_start); + assert_eq!(self.byte_at(start), b'?'); + } + if let Some(start) = self.fragment_start { + assert!(start > self.path_start); + assert_eq!(self.byte_at(start), b'#'); + } + if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) { + assert!(fragment_start > query_start); + } + + let other = Url::parse(self.as_str()).expect("Failed to parse myself?"); + assert_eq!(&self.serialization, &other.serialization); + assert_eq!(self.scheme_end, other.scheme_end); + assert_eq!(self.username_end, other.username_end); + assert_eq!(self.host_start, other.host_start); + assert_eq!(self.host_end, other.host_end); + assert!(self.host == other.host || + // XXX No host round-trips to empty host. + // See https://github.com/whatwg/url/issues/79 + (self.host_str(), other.host_str()) == (None, Some(""))); + assert_eq!(self.port, other.port); + assert_eq!(self.path_start, other.path_start); + assert_eq!(self.query_start, other.query_start); + assert_eq!(self.fragment_start, other.fragment_start); + } + + /// Return the origin of this URL (https://url.spec.whatwg.org/#origin) + /// + /// Note: this returns an opaque origin for `file:` URLs, which causes + /// `url.origin() != url.origin()`. + /// + /// # Examples + /// + /// URL with `ftp` scheme: + /// + /// ```rust + /// use url::{Host, Origin, Url}; + /// + /// let url = Url::parse("ftp://example.com/foo").unwrap(); + /// assert_eq!(url.origin(), + /// Origin::Tuple("ftp".into(), + /// Host::Domain("example.com".into()), + /// 21)); + /// ``` + /// + /// URL with `blob` scheme: + /// + /// ```rust + /// use url::{Host, Origin, Url}; + /// + /// let url = Url::parse("blob:https://example.com/foo").unwrap(); + /// assert_eq!(url.origin(), + /// Origin::Tuple("https".into(), + /// Host::Domain("example.com".into()), + /// 443)); + /// ``` + /// + /// URL with `file` scheme: + /// + /// ```rust + /// use url::{Host, Origin, Url}; + /// + /// let url = Url::parse("file:///tmp/foo").unwrap(); + /// assert!(!url.origin().is_tuple()); + /// + /// let other_url = Url::parse("file:///tmp/foo").unwrap(); + /// assert!(url.origin() != other_url.origin()); + /// ``` + /// + /// URL with other scheme: + /// + /// ```rust + /// use url::{Host, Origin, Url}; + /// + /// let url = Url::parse("foo:bar").unwrap(); + /// assert!(!url.origin().is_tuple()); + /// ``` + #[inline] + pub fn origin(&self) -> Origin { + origin::url_origin(self) + } + + /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// + /// let url = Url::parse("file:///tmp/foo").unwrap(); + /// assert_eq!(url.scheme(), "file"); + /// ``` + #[inline] + pub fn scheme(&self) -> &str { + self.slice(..self.scheme_end) + } + + /// Return whether the URL has an 'authority', + /// which can contain a username, password, host, and port number. + /// + /// URLs that do *not* are either path-only like `unix:/run/foo.socket` + /// or cannot-be-a-base like `data:text/plain,Stuff`. + #[inline] + pub fn has_authority(&self) -> bool { + debug_assert!(self.byte_at(self.scheme_end) == b':'); + self.slice(self.scheme_end..).starts_with("://") + } + + /// Return whether this URL is a cannot-be-a-base URL, + /// meaning that parsing a relative URL string with this URL as the base will return an error. + /// + /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash, + /// as is typically the case of `data:` and `mailto:` URLs. + #[inline] + pub fn cannot_be_a_base(&self) -> bool { + self.byte_at(self.path_start) != b'/' + } + + /// Return the username for this URL (typically the empty string) + /// as a percent-encoded ASCII string. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// + /// let url = Url::parse("ftp://rms@example.com").unwrap(); + /// assert_eq!(url.username(), "rms"); + /// + /// let url = Url::parse("ftp://:secret123@example.com").unwrap(); + /// assert_eq!(url.username(), ""); + /// + /// let url = Url::parse("https://example.com").unwrap(); + /// assert_eq!(url.username(), ""); + /// ``` + pub fn username(&self) -> &str { + if self.has_authority() { + self.slice(self.scheme_end + ("://".len() as u32)..self.username_end) + } else { + "" + } + } + + /// Return the password for this URL, if any, as a percent-encoded ASCII string. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// + /// let url = Url::parse("ftp://rms:secret123@example.com").unwrap(); + /// assert_eq!(url.password(), Some("secret123")); + /// + /// let url = Url::parse("ftp://:secret123@example.com").unwrap(); + /// assert_eq!(url.password(), Some("secret123")); + /// + /// let url = Url::parse("ftp://rms@example.com").unwrap(); + /// assert_eq!(url.password(), None); + /// + /// let url = Url::parse("https://example.com").unwrap(); + /// assert_eq!(url.password(), None); + /// ``` + pub fn password(&self) -> Option<&str> { + // This ':' is not the one marking a port number since a host can not be empty. + // (Except for file: URLs, which do not have port numbers.) + if self.has_authority() && self.byte_at(self.username_end) == b':' { + debug_assert!(self.byte_at(self.host_start - 1) == b'@'); + Some(self.slice(self.username_end + 1..self.host_start - 1)) + } else { + None + } + } + + /// Equivalent to `url.host().is_some()`. + pub fn has_host(&self) -> bool { + !matches!(self.host, HostInternal::None) + } + + /// Return the string representation of the host (domain or IP address) for this URL, if any. + /// + /// Non-ASCII domains are punycode-encoded per IDNA. + /// IPv6 addresses are given between `[` and `]` brackets. + /// + /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs + /// don’t have a host. + /// + /// See also the `host` method. + pub fn host_str(&self) -> Option<&str> { + if self.has_host() { + Some(self.slice(self.host_start..self.host_end)) + } else { + None + } + } + + /// Return the parsed representation of the host for this URL. + /// Non-ASCII domain labels are punycode-encoded per IDNA. + /// + /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs + /// don’t have a host. + /// + /// See also the `host_str` method. + pub fn host(&self) -> Option<Host<&str>> { + match self.host { + HostInternal::None => None, + HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))), + HostInternal::Ipv4(address) => Some(Host::Ipv4(address)), + HostInternal::Ipv6(address) => Some(Host::Ipv6(address)), + } + } + + /// If this URL has a host and it is a domain name (not an IP address), return it. + pub fn domain(&self) -> Option<&str> { + match self.host { + HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)), + _ => None, + } + } + + /// Return the port number for this URL, if any. + #[inline] + pub fn port(&self) -> Option<u16> { + self.port + } + + /// Return the port number for this URL, or the default port number if it is known. + /// + /// This method only knows the default port number + /// of the `http`, `https`, `ws`, `wss`, `ftp`, and `gopher` schemes. + /// + /// For URLs in these schemes, this method always returns `Some(_)`. + /// For other schemes, it is the same as `Url::port()`. + #[inline] + pub fn port_or_known_default(&self) -> Option<u16> { + self.port.or_else(|| parser::default_port(self.scheme())) + } + + /// If the URL has a host, return something that implements `ToSocketAddrs`. + /// + /// If the URL has no port number and the scheme’s default port number is not known + /// (see `Url::port_or_known_default`), + /// the closure is called to obtain a port number. + /// Typically, this closure can match on the result `Url::scheme` + /// to have per-scheme default port numbers, + /// and panic for schemes it’s not prepared to handle. + /// For example: + /// + /// ```rust + /// # use url::Url; + /// # use std::net::TcpStream; + /// # use std::io; + /// + /// fn connect(url: &Url) -> io::Result<TcpStream> { + /// TcpStream::connect(try!(url.with_default_port(default_port))) + /// } + /// + /// fn default_port(url: &Url) -> Result<u16, ()> { + /// match url.scheme() { + /// "git" => Ok(9418), + /// "git+ssh" => Ok(22), + /// "git+https" => Ok(443), + /// "git+http" => Ok(80), + /// _ => Err(()), + /// } + /// } + /// ``` + pub fn with_default_port<F>(&self, f: F) -> io::Result<HostAndPort<&str>> + where F: FnOnce(&Url) -> Result<u16, ()> { + Ok(HostAndPort { + host: try!(self.host() + .ok_or(()) + .or_else(|()| io_error("URL has no host"))), + port: try!(self.port_or_known_default() + .ok_or(()) + .or_else(|()| f(self)) + .or_else(|()| io_error("URL has no port number"))) + }) + } + + /// Return the path for this URL, as a percent-encoded ASCII string. + /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'. + /// For other URLs, this starts with a '/' slash + /// and continues with slash-separated path segments. + pub fn path(&self) -> &str { + match (self.query_start, self.fragment_start) { + (None, None) => self.slice(self.path_start..), + (Some(next_component_start), _) | + (None, Some(next_component_start)) => { + self.slice(self.path_start..next_component_start) + } + } + } + + /// Unless this URL is cannot-be-a-base, + /// return an iterator of '/' slash-separated path segments, + /// each as a percent-encoded ASCII string. + /// + /// Return `None` for cannot-be-a-base URLs. + /// + /// When `Some` is returned, the iterator always contains at least one string + /// (which may be empty). + pub fn path_segments(&self) -> Option<str::Split<char>> { + let path = self.path(); + if path.starts_with('/') { + Some(path[1..].split('/')) + } else { + None + } + } + + /// Return this URL’s query string, if any, as a percent-encoded ASCII string. + pub fn query(&self) -> Option<&str> { + match (self.query_start, self.fragment_start) { + (None, _) => None, + (Some(query_start), None) => { + debug_assert!(self.byte_at(query_start) == b'?'); + Some(self.slice(query_start + 1..)) + } + (Some(query_start), Some(fragment_start)) => { + debug_assert!(self.byte_at(query_start) == b'?'); + Some(self.slice(query_start + 1..fragment_start)) + } + } + } + + /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded` + /// and return an iterator of (key, value) pairs. + #[inline] + pub fn query_pairs(&self) -> form_urlencoded::Parse { + form_urlencoded::parse(self.query().unwrap_or("").as_bytes()) + } + + /// Return this URL’s fragment identifier, if any. + /// + /// **Note:** the parser did *not* percent-encode this component, + /// but the input may have been percent-encoded already. + pub fn fragment(&self) -> Option<&str> { + self.fragment_start.map(|start| { + debug_assert!(self.byte_at(start) == b'#'); + self.slice(start + 1..) + }) + } + + fn mutate<F: FnOnce(&mut Parser) -> R, R>(&mut self, f: F) -> R { + let mut parser = Parser::for_setter(mem::replace(&mut self.serialization, String::new())); + let result = f(&mut parser); + self.serialization = parser.serialization; + result + } + + /// Change this URL’s fragment identifier. + pub fn set_fragment(&mut self, fragment: Option<&str>) { + // Remove any previous fragment + if let Some(start) = self.fragment_start { + debug_assert!(self.byte_at(start) == b'#'); + self.serialization.truncate(start as usize); + } + // Write the new one + if let Some(input) = fragment { + self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); + self.serialization.push('#'); + self.mutate(|parser| parser.parse_fragment(parser::Input::new(input))) + } else { + self.fragment_start = None + } + } + + fn take_fragment(&mut self) -> Option<String> { + self.fragment_start.take().map(|start| { + debug_assert!(self.byte_at(start) == b'#'); + let fragment = self.slice(start + 1..).to_owned(); + self.serialization.truncate(start as usize); + fragment + }) + } + + fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) { + if let Some(ref fragment) = fragment { + assert!(self.fragment_start.is_none()); + self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); + self.serialization.push('#'); + self.serialization.push_str(fragment); + } + } + + /// Change this URL’s query string. + pub fn set_query(&mut self, query: Option<&str>) { + let fragment = self.take_fragment(); + + // Remove any previous query + if let Some(start) = self.query_start.take() { + debug_assert!(self.byte_at(start) == b'?'); + self.serialization.truncate(start as usize); + } + // Write the new query, if any + if let Some(input) = query { + self.query_start = Some(to_u32(self.serialization.len()).unwrap()); + self.serialization.push('?'); + let scheme_end = self.scheme_end; + self.mutate(|parser| parser.parse_query(scheme_end, parser::Input::new(input))); + } + + self.restore_already_parsed_fragment(fragment); + } + + /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs + /// in `application/x-www-form-urlencoded` syntax. + /// + /// The return value has a method-chaining API: + /// + /// ```rust + /// # use url::Url; + /// let mut url = Url::parse("https://example.net?lang=fr#nav").unwrap(); + /// assert_eq!(url.query(), Some("lang=fr")); + /// + /// url.query_pairs_mut().append_pair("foo", "bar"); + /// assert_eq!(url.query(), Some("lang=fr&foo=bar")); + /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav"); + /// + /// url.query_pairs_mut() + /// .clear() + /// .append_pair("foo", "bar & baz") + /// .append_pair("saisons", "Été+hiver"); + /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver")); + /// assert_eq!(url.as_str(), + /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav"); + /// ``` + /// + /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`, + /// not `url.set_query(None)`. + /// + /// The state of `Url` is unspecified if this return value is leaked without being dropped. + pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<UrlQuery> { + let fragment = self.take_fragment(); + + let query_start; + if let Some(start) = self.query_start { + debug_assert!(self.byte_at(start) == b'?'); + query_start = start as usize; + } else { + query_start = self.serialization.len(); + self.query_start = Some(to_u32(query_start).unwrap()); + self.serialization.push('?'); + } + + let query = UrlQuery { url: self, fragment: fragment }; + form_urlencoded::Serializer::for_suffix(query, query_start + "?".len()) + } + + fn take_after_path(&mut self) -> String { + match (self.query_start, self.fragment_start) { + (Some(i), _) | (None, Some(i)) => { + let after_path = self.slice(i..).to_owned(); + self.serialization.truncate(i as usize); + after_path + }, + (None, None) => String::new(), + } + } + + /// Change this URL’s path. + pub fn set_path(&mut self, mut path: &str) { + let after_path = self.take_after_path(); + let old_after_path_pos = to_u32(self.serialization.len()).unwrap(); + let cannot_be_a_base = self.cannot_be_a_base(); + let scheme_type = SchemeType::from(self.scheme()); + self.serialization.truncate(self.path_start as usize); + self.mutate(|parser| { + if cannot_be_a_base { + if path.starts_with('/') { + parser.serialization.push_str("%2F"); + path = &path[1..]; + } + parser.parse_cannot_be_a_base_path(parser::Input::new(path)); + } else { + let mut has_host = true; // FIXME + parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path)); + } + }); + self.restore_after_path(old_after_path_pos, &after_path); + } + + /// Return an object with methods to manipulate this URL’s path segments. + /// + /// Return `Err(())` if this URl is cannot-be-a-base. + pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut, ()> { + if self.cannot_be_a_base() { + Err(()) + } else { + Ok(path_segments::new(self)) + } + } + + fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) { + let new_after_path_position = to_u32(self.serialization.len()).unwrap(); + let adjust = |index: &mut u32| { + *index -= old_after_path_position; + *index += new_after_path_position; + }; + if let Some(ref mut index) = self.query_start { adjust(index) } + if let Some(ref mut index) = self.fragment_start { adjust(index) } + self.serialization.push_str(after_path) + } + + /// Change this URL’s port number. + /// + /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme; + /// do nothing and return `Err`. + pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> { + if !self.has_host() || self.scheme() == "file" { + return Err(()) + } + if port.is_some() && port == parser::default_port(self.scheme()) { + port = None + } + self.set_port_internal(port); + Ok(()) + } + + fn set_port_internal(&mut self, port: Option<u16>) { + match (self.port, port) { + (None, None) => {} + (Some(_), None) => { + self.serialization.drain(self.host_end as usize .. self.path_start as usize); + let offset = self.path_start - self.host_end; + self.path_start = self.host_end; + if let Some(ref mut index) = self.query_start { *index -= offset } + if let Some(ref mut index) = self.fragment_start { *index -= offset } + } + (Some(old), Some(new)) if old == new => {} + (_, Some(new)) => { + let path_and_after = self.slice(self.path_start..).to_owned(); + self.serialization.truncate(self.host_end as usize); + write!(&mut self.serialization, ":{}", new).unwrap(); + let old_path_start = self.path_start; + let new_path_start = to_u32(self.serialization.len()).unwrap(); + self.path_start = new_path_start; + let adjust = |index: &mut u32| { + *index -= old_path_start; + *index += new_path_start; + }; + if let Some(ref mut index) = self.query_start { adjust(index) } + if let Some(ref mut index) = self.fragment_start { adjust(index) } + self.serialization.push_str(&path_and_after); + } + } + self.port = port; + } + + /// Change this URL’s host. + /// + /// If this URL is cannot-be-a-base or there is an error parsing the given `host`, + /// do nothing and return `Err`. + /// + /// Removing the host (calling this with `None`) + /// will also remove any username, password, and port number. + pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> { + if self.cannot_be_a_base() { + return Err(ParseError::SetHostOnCannotBeABaseUrl) + } + + if let Some(host) = host { + self.set_host_internal(try!(Host::parse(host)), None) + } else if self.has_host() { + debug_assert!(self.byte_at(self.scheme_end) == b':'); + debug_assert!(self.byte_at(self.path_start) == b'/'); + let new_path_start = self.scheme_end + 1; + self.serialization.drain(self.path_start as usize..new_path_start as usize); + let offset = self.path_start - new_path_start; + self.path_start = new_path_start; + self.username_end = new_path_start; + self.host_start = new_path_start; + self.host_end = new_path_start; + self.port = None; + if let Some(ref mut index) = self.query_start { *index -= offset } + if let Some(ref mut index) = self.fragment_start { *index -= offset } + } + Ok(()) + } + + /// opt_new_port: None means leave unchanged, Some(None) means remove any port number. + fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) { + let old_suffix_pos = if opt_new_port.is_some() { self.path_start } else { self.host_end }; + let suffix = self.slice(old_suffix_pos..).to_owned(); + self.serialization.truncate(self.host_start as usize); + if !self.has_authority() { + debug_assert!(self.slice(self.scheme_end..self.host_start) == ":"); + debug_assert!(self.username_end == self.host_start); + self.serialization.push('/'); + self.serialization.push('/'); + self.username_end += 2; + self.host_start += 2; + } + write!(&mut self.serialization, "{}", host).unwrap(); + self.host_end = to_u32(self.serialization.len()).unwrap(); + self.host = host.into(); + + if let Some(new_port) = opt_new_port { + self.port = new_port; + if let Some(port) = new_port { + write!(&mut self.serialization, ":{}", port).unwrap(); + } + } + let new_suffix_pos = to_u32(self.serialization.len()).unwrap(); + self.serialization.push_str(&suffix); + + let adjust = |index: &mut u32| { + *index -= old_suffix_pos; + *index += new_suffix_pos; + }; + adjust(&mut self.path_start); + if let Some(ref mut index) = self.query_start { adjust(index) } + if let Some(ref mut index) = self.fragment_start { adjust(index) } + } + + /// Change this URL’s host to the given IP address. + /// + /// If this URL is cannot-be-a-base, do nothing and return `Err`. + /// + /// Compared to `Url::set_host`, this skips the host parser. + pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> { + if self.cannot_be_a_base() { + return Err(()) + } + + let address = match address { + IpAddr::V4(address) => Host::Ipv4(address), + IpAddr::V6(address) => Host::Ipv6(address), + }; + self.set_host_internal(address, None); + Ok(()) + } + + /// Change this URL’s password. + /// + /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. + pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> { + if !self.has_host() { + return Err(()) + } + if let Some(password) = password { + let host_and_after = self.slice(self.host_start..).to_owned(); + self.serialization.truncate(self.username_end as usize); + self.serialization.push(':'); + self.serialization.extend(utf8_percent_encode(password, USERINFO_ENCODE_SET)); + self.serialization.push('@'); + + let old_host_start = self.host_start; + let new_host_start = to_u32(self.serialization.len()).unwrap(); + let adjust = |index: &mut u32| { + *index -= old_host_start; + *index += new_host_start; + }; + self.host_start = new_host_start; + adjust(&mut self.host_end); + adjust(&mut self.path_start); + if let Some(ref mut index) = self.query_start { adjust(index) } + if let Some(ref mut index) = self.fragment_start { adjust(index) } + + self.serialization.push_str(&host_and_after); + } else if self.byte_at(self.username_end) == b':' { // If there is a password to remove + let has_username_or_password = self.byte_at(self.host_start - 1) == b'@'; + debug_assert!(has_username_or_password); + let username_start = self.scheme_end + 3; + let empty_username = username_start == self.username_end; + let start = self.username_end; // Remove the ':' + let end = if empty_username { + self.host_start // Remove the '@' as well + } else { + self.host_start - 1 // Keep the '@' to separate the username from the host + }; + self.serialization.drain(start as usize .. end as usize); + let offset = end - start; + self.host_start -= offset; + self.host_end -= offset; + self.path_start -= offset; + if let Some(ref mut index) = self.query_start { *index -= offset } + if let Some(ref mut index) = self.fragment_start { *index -= offset } + } + Ok(()) + } + + /// Change this URL’s username. + /// + /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. + pub fn set_username(&mut self, username: &str) -> Result<(), ()> { + if !self.has_host() { + return Err(()) + } + let username_start = self.scheme_end + 3; + debug_assert!(self.slice(self.scheme_end..username_start) == "://"); + if self.slice(username_start..self.username_end) == username { + return Ok(()) + } + let after_username = self.slice(self.username_end..).to_owned(); + self.serialization.truncate(username_start as usize); + self.serialization.extend(utf8_percent_encode(username, USERINFO_ENCODE_SET)); + + let mut removed_bytes = self.username_end; + self.username_end = to_u32(self.serialization.len()).unwrap(); + let mut added_bytes = self.username_end; + + let new_username_is_empty = self.username_end == username_start; + match (new_username_is_empty, after_username.chars().next()) { + (true, Some('@')) => { + removed_bytes += 1; + self.serialization.push_str(&after_username[1..]); + } + (false, Some('@')) | (_, Some(':')) | (true, _) => { + self.serialization.push_str(&after_username); + } + (false, _) => { + added_bytes += 1; + self.serialization.push('@'); + self.serialization.push_str(&after_username); + } + } + + let adjust = |index: &mut u32| { + *index -= removed_bytes; + *index += added_bytes; + }; + adjust(&mut self.host_start); + adjust(&mut self.host_end); + adjust(&mut self.path_start); + if let Some(ref mut index) = self.query_start { adjust(index) } + if let Some(ref mut index) = self.fragment_start { adjust(index) } + Ok(()) + } + + /// Change this URL’s scheme. + /// + /// Do nothing and return `Err` if: + /// * The new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+` + /// * This URL is cannot-be-a-base and the new scheme is one of + /// `http`, `https`, `ws`, `wss`, `ftp`, or `gopher` + pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { + let mut parser = Parser::for_setter(String::new()); + let remaining = try!(parser.parse_scheme(parser::Input::new(scheme))); + if !remaining.is_empty() || + (!self.has_host() && SchemeType::from(&parser.serialization).is_special()) { + return Err(()) + } + let old_scheme_end = self.scheme_end; + let new_scheme_end = to_u32(parser.serialization.len()).unwrap(); + let adjust = |index: &mut u32| { + *index -= old_scheme_end; + *index += new_scheme_end; + }; + + self.scheme_end = new_scheme_end; + adjust(&mut self.username_end); + adjust(&mut self.host_start); + adjust(&mut self.host_end); + adjust(&mut self.path_start); + if let Some(ref mut index) = self.query_start { adjust(index) } + if let Some(ref mut index) = self.fragment_start { adjust(index) } + + parser.serialization.push_str(self.slice(old_scheme_end..)); + self.serialization = parser.serialization; + Ok(()) + } + + /// Convert a file name as `std::path::Path` into an URL in the `file` scheme. + /// + /// This returns `Err` if the given path is not absolute or, + /// on Windows, if the prefix is not a disk prefix (e.g. `C:`). + pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> { + let mut serialization = "file://".to_owned(); + let path_start = serialization.len() as u32; + try!(path_to_file_url_segments(path.as_ref(), &mut serialization)); + Ok(Url { + serialization: serialization, + scheme_end: "file".len() as u32, + username_end: path_start, + host_start: path_start, + host_end: path_start, + host: HostInternal::None, + port: None, + path_start: path_start, + query_start: None, + fragment_start: None, + }) + } + + /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme. + /// + /// This returns `Err` if the given path is not absolute or, + /// on Windows, if the prefix is not a disk prefix (e.g. `C:`). + /// + /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash + /// so that the entire path is considered when using this URL as a base URL. + /// + /// For example: + /// + /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))` + /// as the base URL is `file:///var/www/index.html` + /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))` + /// as the base URL is `file:///var/index.html`, which might not be what was intended. + /// + /// Note that `std::path` does not consider trailing slashes significant + /// and usually does not include them (e.g. in `Path::parent()`). + pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> { + let mut url = try!(Url::from_file_path(path)); + if !url.serialization.ends_with('/') { + url.serialization.push('/') + } + Ok(url) + } + + /// Assuming the URL is in the `file` scheme or similar, + /// convert its path to an absolute `std::path::Path`. + /// + /// **Note:** This does not actually check the URL’s `scheme`, + /// and may give nonsensical results for other schemes. + /// It is the user’s responsibility to check the URL’s scheme before calling this. + /// + /// ``` + /// # use url::Url; + /// # let url = Url::parse("file:///etc/passwd").unwrap(); + /// let path = url.to_file_path(); + /// ``` + /// + /// Returns `Err` if the host is neither empty nor `"localhost"`, + /// or if `Path::new_opt()` returns `None`. + /// (That is, if the percent-decoded path contains a NUL byte or, + /// for a Windows path, is not UTF-8.) + #[inline] + pub fn to_file_path(&self) -> Result<PathBuf, ()> { + // FIXME: Figure out what to do w.r.t host. + if matches!(self.host(), None | Some(Host::Domain("localhost"))) { + if let Some(segments) = self.path_segments() { + return file_url_segments_to_pathbuf(segments) + } + } + Err(()) + } + + // Private helper methods: + + #[inline] + fn slice<R>(&self, range: R) -> &str where R: RangeArg { + range.slice_of(&self.serialization) + } + + #[inline] + fn byte_at(&self, i: u32) -> u8 { + self.serialization.as_bytes()[i as usize] + } +} + +/// Return an error if `Url::host` or `Url::port_or_known_default` return `None`. +impl ToSocketAddrs for Url { + type Iter = SocketAddrs; + + fn to_socket_addrs(&self) -> io::Result<Self::Iter> { + try!(self.with_default_port(|_| Err(()))).to_socket_addrs() + } +} + +/// Parse a string as an URL, without a base URL or encoding override. +impl str::FromStr for Url { + type Err = ParseError; + + #[inline] + fn from_str(input: &str) -> Result<Url, ::ParseError> { + Url::parse(input) + } +} + +/// Display the serialization of this URL. +impl fmt::Display for Url { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.serialization, formatter) + } +} + +/// Debug the serialization of this URL. +impl fmt::Debug for Url { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&self.serialization, formatter) + } +} + +/// URLs compare like their serialization. +impl Eq for Url {} + +/// URLs compare like their serialization. +impl PartialEq for Url { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.serialization == other.serialization + } +} + +/// URLs compare like their serialization. +impl Ord for Url { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + self.serialization.cmp(&other.serialization) + } +} + +/// URLs compare like their serialization. +impl PartialOrd for Url { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { + self.serialization.partial_cmp(&other.serialization) + } +} + +/// URLs hash like their serialization. +impl hash::Hash for Url { + #[inline] + fn hash<H>(&self, state: &mut H) where H: hash::Hasher { + hash::Hash::hash(&self.serialization, state) + } +} + +/// Return the serialization of this URL. +impl AsRef<str> for Url { + #[inline] + fn as_ref(&self) -> &str { + &self.serialization + } +} + +trait RangeArg { + fn slice_of<'a>(&self, s: &'a str) -> &'a str; +} + +impl RangeArg for Range<u32> { + #[inline] + fn slice_of<'a>(&self, s: &'a str) -> &'a str { + &s[self.start as usize .. self.end as usize] + } +} + +impl RangeArg for RangeFrom<u32> { + #[inline] + fn slice_of<'a>(&self, s: &'a str) -> &'a str { + &s[self.start as usize ..] + } +} + +impl RangeArg for RangeTo<u32> { + #[inline] + fn slice_of<'a>(&self, s: &'a str) -> &'a str { + &s[.. self.end as usize] + } +} + +#[cfg(feature="rustc-serialize")] +impl rustc_serialize::Encodable for Url { + fn encode<S: rustc_serialize::Encoder>(&self, encoder: &mut S) -> Result<(), S::Error> { + encoder.emit_str(self.as_str()) + } +} + + +#[cfg(feature="rustc-serialize")] +impl rustc_serialize::Decodable for Url { + fn decode<D: rustc_serialize::Decoder>(decoder: &mut D) -> Result<Url, D::Error> { + Url::parse(&*try!(decoder.read_str())).map_err(|error| { + decoder.error(&format!("URL parsing error: {}", error)) + }) + } +} + +/// Serializes this URL into a `serde` stream. +/// +/// This implementation is only available if the `serde` Cargo feature is enabled. +#[cfg(feature="serde")] +impl serde::Serialize for Url { + fn serialize<S>(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::Serializer { + format!("{}", self).serialize(serializer) + } +} + +/// Deserializes this URL from a `serde` stream. +/// +/// This implementation is only available if the `serde` Cargo feature is enabled. +#[cfg(feature="serde")] +impl serde::Deserialize for Url { + fn deserialize<D>(deserializer: &mut D) -> Result<Url, D::Error> where D: serde::Deserializer { + let string_representation: String = try!(serde::Deserialize::deserialize(deserializer)); + Ok(Url::parse(&string_representation).unwrap()) + } +} + +#[cfg(unix)] +fn path_to_file_url_segments(path: &Path, serialization: &mut String) -> Result<(), ()> { + use std::os::unix::prelude::OsStrExt; + if !path.is_absolute() { + return Err(()) + } + let mut empty = true; + // skip the root component + for component in path.components().skip(1) { + empty = false; + serialization.push('/'); + serialization.extend(percent_encode( + component.as_os_str().as_bytes(), PATH_SEGMENT_ENCODE_SET)); + } + if empty { + // An URL’s path must not be empty. + serialization.push('/'); + } + Ok(()) +} + +#[cfg(windows)] +fn path_to_file_url_segments(path: &Path, serialization: &mut String) -> Result<(), ()> { + path_to_file_url_segments_windows(path, serialization) +} + +// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 +#[cfg_attr(not(windows), allow(dead_code))] +fn path_to_file_url_segments_windows(path: &Path, serialization: &mut String) -> Result<(), ()> { + use std::path::{Prefix, Component}; + if !path.is_absolute() { + return Err(()) + } + let mut components = path.components(); + let disk = match components.next() { + Some(Component::Prefix(ref p)) => match p.kind() { + Prefix::Disk(byte) => byte, + Prefix::VerbatimDisk(byte) => byte, + _ => return Err(()), + }, + + // FIXME: do something with UNC and other prefixes? + _ => return Err(()) + }; + + // Start with the prefix, e.g. "C:" + serialization.push('/'); + serialization.push(disk as char); + serialization.push(':'); + + for component in components { + if component == Component::RootDir { continue } + // FIXME: somehow work with non-unicode? + let component = try!(component.as_os_str().to_str().ok_or(())); + serialization.push('/'); + serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT_ENCODE_SET)); + } + Ok(()) +} + +#[cfg(unix)] +fn file_url_segments_to_pathbuf(segments: str::Split<char>) -> Result<PathBuf, ()> { + use std::ffi::OsStr; + use std::os::unix::prelude::OsStrExt; + use std::path::PathBuf; + + let mut bytes = Vec::new(); + for segment in segments { + bytes.push(b'/'); + bytes.extend(percent_decode(segment.as_bytes())); + } + let os_str = OsStr::from_bytes(&bytes); + let path = PathBuf::from(os_str); + debug_assert!(path.is_absolute(), + "to_file_path() failed to produce an absolute Path"); + Ok(path) +} + +#[cfg(windows)] +fn file_url_segments_to_pathbuf(segments: str::Split<char>) -> Result<PathBuf, ()> { + file_url_segments_to_pathbuf_windows(segments) +} + +// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 +#[cfg_attr(not(windows), allow(dead_code))] +fn file_url_segments_to_pathbuf_windows(mut segments: str::Split<char>) -> Result<PathBuf, ()> { + let first = try!(segments.next().ok_or(())); + if first.len() != 2 || !first.starts_with(parser::ascii_alpha) + || first.as_bytes()[1] != b':' { + return Err(()) + } + let mut string = first.to_owned(); + for segment in segments { + string.push('\\'); + + // Currently non-unicode windows paths cannot be represented + match String::from_utf8(percent_decode(segment.as_bytes()).collect()) { + Ok(s) => string.push_str(&s), + Err(..) => return Err(()), + } + } + let path = PathBuf::from(string); + debug_assert!(path.is_absolute(), + "to_file_path() failed to produce an absolute Path"); + Ok(path) +} + +fn io_error<T>(reason: &str) -> io::Result<T> { + Err(io::Error::new(io::ErrorKind::InvalidData, reason)) +} + +/// Implementation detail of `Url::query_pairs_mut`. Typically not used directly. +pub struct UrlQuery<'a> { + url: &'a mut Url, + fragment: Option<String>, +} + +impl<'a> Drop for UrlQuery<'a> { + fn drop(&mut self) { + self.url.restore_already_parsed_fragment(self.fragment.take()) + } +} diff --git a/third_party/rust/url/src/origin.rs b/third_party/rust/url/src/origin.rs new file mode 100644 index 000000000..2217c94fe --- /dev/null +++ b/third_party/rust/url/src/origin.rs @@ -0,0 +1,114 @@ +// Copyright 2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; +use host::Host; +use idna::domain_to_unicode; +use parser::default_port; +use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering}; +use Url; + +pub fn url_origin(url: &Url) -> Origin { + let scheme = url.scheme(); + match scheme { + "blob" => { + let result = Url::parse(url.path()); + match result { + Ok(ref url) => url_origin(url), + Err(_) => Origin::new_opaque() + } + }, + "ftp" | "gopher" | "http" | "https" | "ws" | "wss" => { + Origin::Tuple(scheme.to_owned(), url.host().unwrap().to_owned(), + url.port_or_known_default().unwrap()) + }, + // TODO: Figure out what to do if the scheme is a file + "file" => Origin::new_opaque(), + _ => Origin::new_opaque() + } +} + +/// The origin of an URL +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum Origin { + /// A globally unique identifier + Opaque(OpaqueOrigin), + + /// Consists of the URL's scheme, host and port + Tuple(String, Host<String>, u16) +} + +#[cfg(feature = "heapsize")] +impl HeapSizeOf for Origin { + fn heap_size_of_children(&self) -> usize { + match *self { + Origin::Tuple(ref scheme, ref host, _) => { + scheme.heap_size_of_children() + + host.heap_size_of_children() + }, + _ => 0, + } + } +} + + +impl Origin { + /// Creates a new opaque origin that is only equal to itself. + pub fn new_opaque() -> Origin { + static COUNTER: AtomicUsize = ATOMIC_USIZE_INIT; + Origin::Opaque(OpaqueOrigin(COUNTER.fetch_add(1, Ordering::SeqCst))) + } + + /// Return whether this origin is a (scheme, host, port) tuple + /// (as opposed to an opaque origin). + pub fn is_tuple(&self) -> bool { + matches!(*self, Origin::Tuple(..)) + } + + /// https://html.spec.whatwg.org/multipage/#ascii-serialisation-of-an-origin + pub fn ascii_serialization(&self) -> String { + match *self { + Origin::Opaque(_) => "null".to_owned(), + Origin::Tuple(ref scheme, ref host, port) => { + if default_port(scheme) == Some(port) { + format!("{}://{}", scheme, host) + } else { + format!("{}://{}:{}", scheme, host, port) + } + } + } + } + + /// https://html.spec.whatwg.org/multipage/#unicode-serialisation-of-an-origin + pub fn unicode_serialization(&self) -> String { + match *self { + Origin::Opaque(_) => "null".to_owned(), + Origin::Tuple(ref scheme, ref host, port) => { + let host = match *host { + Host::Domain(ref domain) => { + let (domain, _errors) = domain_to_unicode(domain); + Host::Domain(domain) + } + _ => host.clone() + }; + if default_port(scheme) == Some(port) { + format!("{}://{}", scheme, host) + } else { + format!("{}://{}:{}", scheme, host, port) + } + } + } + } +} + +/// Opaque identifier for URLs that have file or other schemes +#[derive(Eq, PartialEq, Clone, Debug)] +pub struct OpaqueOrigin(usize); + +#[cfg(feature = "heapsize")] +known_heap_size!(0, OpaqueOrigin); diff --git a/third_party/rust/url/src/parser.rs b/third_party/rust/url/src/parser.rs new file mode 100644 index 000000000..2945432fc --- /dev/null +++ b/third_party/rust/url/src/parser.rs @@ -0,0 +1,1179 @@ +// Copyright 2013-2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::ascii::AsciiExt; +use std::error::Error; +use std::fmt::{self, Formatter, Write}; +use std::str; + +use Url; +use encoding::EncodingOverride; +use host::{Host, HostInternal}; +use percent_encoding::{ + utf8_percent_encode, percent_encode, + SIMPLE_ENCODE_SET, DEFAULT_ENCODE_SET, USERINFO_ENCODE_SET, QUERY_ENCODE_SET, + PATH_SEGMENT_ENCODE_SET +}; + +pub type ParseResult<T> = Result<T, ParseError>; + +macro_rules! simple_enum_error { + ($($name: ident => $description: expr,)+) => { + /// Errors that can occur during parsing. + #[derive(PartialEq, Eq, Clone, Copy, Debug)] + pub enum ParseError { + $( + $name, + )+ + } + + impl Error for ParseError { + fn description(&self) -> &str { + match *self { + $( + ParseError::$name => $description, + )+ + } + } + } + } +} + +simple_enum_error! { + EmptyHost => "empty host", + IdnaError => "invalid international domain name", + InvalidPort => "invalid port number", + InvalidIpv4Address => "invalid IPv4 address", + InvalidIpv6Address => "invalid IPv6 address", + InvalidDomainCharacter => "invalid domain character", + RelativeUrlWithoutBase => "relative URL without a base", + RelativeUrlWithCannotBeABaseBase => "relative URL with a cannot-be-a-base base", + SetHostOnCannotBeABaseUrl => "a cannot-be-a-base URL doesn’t have a host to set", + Overflow => "URLs more than 4 GB are not supported", +} + +impl fmt::Display for ParseError { + fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { + self.description().fmt(fmt) + } +} + +impl From<::idna::uts46::Errors> for ParseError { + fn from(_: ::idna::uts46::Errors) -> ParseError { ParseError::IdnaError } +} + +#[derive(Copy, Clone)] +pub enum SchemeType { + File, + SpecialNotFile, + NotSpecial, +} + +impl SchemeType { + pub fn is_special(&self) -> bool { + !matches!(*self, SchemeType::NotSpecial) + } + + pub fn is_file(&self) -> bool { + matches!(*self, SchemeType::File) + } + + pub fn from(s: &str) -> Self { + match s { + "http" | "https" | "ws" | "wss" | "ftp" | "gopher" => SchemeType::SpecialNotFile, + "file" => SchemeType::File, + _ => SchemeType::NotSpecial, + } + } +} + +pub fn default_port(scheme: &str) -> Option<u16> { + match scheme { + "http" | "ws" => Some(80), + "https" | "wss" => Some(443), + "ftp" => Some(21), + "gopher" => Some(70), + _ => None, + } +} + +#[derive(Clone)] +pub struct Input<'i> { + chars: str::Chars<'i>, +} + +impl<'i> Input<'i> { + pub fn new(input: &'i str) -> Self { + Input::with_log(input, None) + } + + pub fn with_log(original_input: &'i str, log_syntax_violation: Option<&Fn(&'static str)>) + -> Self { + let input = original_input.trim_matches(c0_control_or_space); + if let Some(log) = log_syntax_violation { + if input.len() < original_input.len() { + log("leading or trailing control or space character are ignored in URLs") + } + if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) { + log("tabs or newlines are ignored in URLs") + } + } + Input { chars: input.chars() } + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.clone().next().is_none() + } + + #[inline] + fn starts_with<P: Pattern>(&self, p: P) -> bool { + p.split_prefix(&mut self.clone()) + } + + #[inline] + pub fn split_prefix<P: Pattern>(&self, p: P) -> Option<Self> { + let mut remaining = self.clone(); + if p.split_prefix(&mut remaining) { + Some(remaining) + } else { + None + } + } + + #[inline] + fn split_first(&self) -> (Option<char>, Self) { + let mut remaining = self.clone(); + (remaining.next(), remaining) + } + + #[inline] + fn count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self) { + let mut count = 0; + let mut remaining = self.clone(); + loop { + let mut input = remaining.clone(); + if matches!(input.next(), Some(c) if f(c)) { + remaining = input; + count += 1; + } else { + return (count, remaining) + } + } + } + + #[inline] + fn next_utf8(&mut self) -> Option<(char, &'i str)> { + loop { + let utf8 = self.chars.as_str(); + match self.chars.next() { + Some(c) => { + if !matches!(c, '\t' | '\n' | '\r') { + return Some((c, &utf8[..c.len_utf8()])) + } + } + None => return None + } + } + } +} + +pub trait Pattern { + fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool; +} + +impl Pattern for char { + fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { input.next() == Some(self) } +} + +impl<'a> Pattern for &'a str { + fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { + for c in self.chars() { + if input.next() != Some(c) { + return false + } + } + true + } +} + +impl<F: FnMut(char) -> bool> Pattern for F { + fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { input.next().map_or(false, self) } +} + +impl<'i> Iterator for Input<'i> { + type Item = char; + fn next(&mut self) -> Option<char> { + self.chars.by_ref().filter(|&c| !matches!(c, '\t' | '\n' | '\r')).next() + } +} + +pub struct Parser<'a> { + pub serialization: String, + pub base_url: Option<&'a Url>, + pub query_encoding_override: EncodingOverride, + pub log_syntax_violation: Option<&'a Fn(&'static str)>, + pub context: Context, +} + +#[derive(PartialEq, Eq, Copy, Clone)] +pub enum Context { + UrlParser, + Setter, + PathSegmentSetter, +} + +impl<'a> Parser<'a> { + pub fn for_setter(serialization: String) -> Parser<'a> { + Parser { + serialization: serialization, + base_url: None, + query_encoding_override: EncodingOverride::utf8(), + log_syntax_violation: None, + context: Context::Setter, + } + } + + fn syntax_violation(&self, reason: &'static str) { + if let Some(log) = self.log_syntax_violation { + log(reason) + } + } + + fn syntax_violation_if<F: Fn() -> bool>(&self, reason: &'static str, test: F) { + // Skip test if not logging. + if let Some(log) = self.log_syntax_violation { + if test() { + log(reason) + } + } + } + + /// https://url.spec.whatwg.org/#concept-basic-url-parser + pub fn parse_url(mut self, input: &str) -> ParseResult<Url> { + let input = Input::with_log(input, self.log_syntax_violation); + if let Ok(remaining) = self.parse_scheme(input.clone()) { + return self.parse_with_scheme(remaining) + } + + // No-scheme state + if let Some(base_url) = self.base_url { + if input.starts_with('#') { + self.fragment_only(base_url, input) + } else if base_url.cannot_be_a_base() { + Err(ParseError::RelativeUrlWithCannotBeABaseBase) + } else { + let scheme_type = SchemeType::from(base_url.scheme()); + if scheme_type.is_file() { + self.parse_file(input, Some(base_url)) + } else { + self.parse_relative(input, scheme_type, base_url) + } + } + } else { + Err(ParseError::RelativeUrlWithoutBase) + } + } + + pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result<Input<'i>, ()> { + if input.is_empty() || !input.starts_with(ascii_alpha) { + return Err(()) + } + debug_assert!(self.serialization.is_empty()); + while let Some(c) = input.next() { + match c { + 'a'...'z' | 'A'...'Z' | '0'...'9' | '+' | '-' | '.' => { + self.serialization.push(c.to_ascii_lowercase()) + } + ':' => return Ok(input), + _ => { + self.serialization.clear(); + return Err(()) + } + } + } + // EOF before ':' + if self.context == Context::Setter { + Ok(input) + } else { + self.serialization.clear(); + Err(()) + } + } + + fn parse_with_scheme(mut self, input: Input) -> ParseResult<Url> { + let scheme_end = try!(to_u32(self.serialization.len())); + let scheme_type = SchemeType::from(&self.serialization); + self.serialization.push(':'); + match scheme_type { + SchemeType::File => { + self.syntax_violation_if("expected // after file:", || !input.starts_with("//")); + let base_file_url = self.base_url.and_then(|base| { + if base.scheme() == "file" { Some(base) } else { None } + }); + self.serialization.clear(); + self.parse_file(input, base_file_url) + } + SchemeType::SpecialNotFile => { + // special relative or authority state + let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\')); + if let Some(base_url) = self.base_url { + if slashes_count < 2 && + base_url.scheme() == &self.serialization[..scheme_end as usize] { + // "Cannot-be-a-base" URLs only happen with "not special" schemes. + debug_assert!(!base_url.cannot_be_a_base()); + self.serialization.clear(); + return self.parse_relative(input, scheme_type, base_url) + } + } + // special authority slashes state + self.syntax_violation_if("expected //", || { + input.clone().take_while(|&c| matches!(c, '/' | '\\')) + .collect::<String>() != "//" + }); + self.after_double_slash(remaining, scheme_type, scheme_end) + } + SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end) + } + } + + /// Scheme other than file, http, https, ws, ws, ftp, gopher. + fn parse_non_special(mut self, input: Input, scheme_type: SchemeType, scheme_end: u32) + -> ParseResult<Url> { + // path or authority state ( + if let Some(input) = input.split_prefix("//") { + return self.after_double_slash(input, scheme_type, scheme_end) + } + // Anarchist URL (no authority) + let path_start = try!(to_u32(self.serialization.len())); + let username_end = path_start; + let host_start = path_start; + let host_end = path_start; + let host = HostInternal::None; + let port = None; + let remaining = if let Some(input) = input.split_prefix('/') { + let path_start = self.serialization.len(); + self.serialization.push('/'); + self.parse_path(scheme_type, &mut false, path_start, input) + } else { + self.parse_cannot_be_a_base_path(input) + }; + self.with_query_and_fragment(scheme_end, username_end, host_start, + host_end, host, port, path_start, remaining) + } + + fn parse_file(mut self, input: Input, mut base_file_url: Option<&Url>) -> ParseResult<Url> { + // file state + debug_assert!(self.serialization.is_empty()); + let (first_char, input_after_first_char) = input.split_first(); + match first_char { + None => { + if let Some(base_url) = base_file_url { + // Copy everything except the fragment + let before_fragment = match base_url.fragment_start { + Some(i) => &base_url.serialization[..i as usize], + None => &*base_url.serialization, + }; + self.serialization.push_str(before_fragment); + Ok(Url { + serialization: self.serialization, + fragment_start: None, + ..*base_url + }) + } else { + self.serialization.push_str("file:///"); + let scheme_end = "file".len() as u32; + let path_start = "file://".len() as u32; + Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: path_start, + host_start: path_start, + host_end: path_start, + host: HostInternal::None, + port: None, + path_start: path_start, + query_start: None, + fragment_start: None, + }) + } + }, + Some('?') => { + if let Some(base_url) = base_file_url { + // Copy everything up to the query string + let before_query = match (base_url.query_start, base_url.fragment_start) { + (None, None) => &*base_url.serialization, + (Some(i), _) | + (None, Some(i)) => base_url.slice(..i) + }; + self.serialization.push_str(before_query); + let (query_start, fragment_start) = + try!(self.parse_query_and_fragment(base_url.scheme_end, input)); + Ok(Url { + serialization: self.serialization, + query_start: query_start, + fragment_start: fragment_start, + ..*base_url + }) + } else { + self.serialization.push_str("file:///"); + let scheme_end = "file".len() as u32; + let path_start = "file://".len() as u32; + let (query_start, fragment_start) = + try!(self.parse_query_and_fragment(scheme_end, input)); + Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: path_start, + host_start: path_start, + host_end: path_start, + host: HostInternal::None, + port: None, + path_start: path_start, + query_start: query_start, + fragment_start: fragment_start, + }) + } + }, + Some('#') => { + if let Some(base_url) = base_file_url { + self.fragment_only(base_url, input) + } else { + self.serialization.push_str("file:///"); + let scheme_end = "file".len() as u32; + let path_start = "file://".len() as u32; + let fragment_start = "file:///".len() as u32; + self.parse_fragment(input_after_first_char); + Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: path_start, + host_start: path_start, + host_end: path_start, + host: HostInternal::None, + port: None, + path_start: path_start, + query_start: None, + fragment_start: Some(fragment_start), + }) + } + } + Some('/') | Some('\\') => { + self.syntax_violation_if("backslash", || first_char == Some('\\')); + // file slash state + let (next_char, input_after_next_char) = input_after_first_char.split_first(); + self.syntax_violation_if("backslash", || next_char == Some('\\')); + if matches!(next_char, Some('/') | Some('\\')) { + // file host state + self.serialization.push_str("file://"); + let scheme_end = "file".len() as u32; + let host_start = "file://".len() as u32; + let (path_start, host, remaining) = + try!(self.parse_file_host(input_after_next_char)); + let host_end = try!(to_u32(self.serialization.len())); + let mut has_host = !matches!(host, HostInternal::None); + let remaining = if path_start { + self.parse_path_start(SchemeType::File, &mut has_host, remaining) + } else { + let path_start = self.serialization.len(); + self.serialization.push('/'); + self.parse_path(SchemeType::File, &mut has_host, path_start, remaining) + }; + // FIXME: deal with has_host + let (query_start, fragment_start) = + try!(self.parse_query_and_fragment(scheme_end, remaining)); + Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: host_start, + host_start: host_start, + host_end: host_end, + host: host, + port: None, + path_start: host_end, + query_start: query_start, + fragment_start: fragment_start, + }) + } else { + self.serialization.push_str("file:///"); + let scheme_end = "file".len() as u32; + let path_start = "file://".len(); + if let Some(base_url) = base_file_url { + let first_segment = base_url.path_segments().unwrap().next().unwrap(); + // FIXME: *normalized* drive letter + if is_windows_drive_letter(first_segment) { + self.serialization.push_str(first_segment); + self.serialization.push('/'); + } + } + let remaining = self.parse_path( + SchemeType::File, &mut false, path_start, input_after_first_char); + let (query_start, fragment_start) = + try!(self.parse_query_and_fragment(scheme_end, remaining)); + let path_start = path_start as u32; + Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: path_start, + host_start: path_start, + host_end: path_start, + host: HostInternal::None, + port: None, + path_start: path_start, + query_start: query_start, + fragment_start: fragment_start, + }) + } + } + _ => { + if starts_with_windows_drive_letter_segment(&input) { + base_file_url = None; + } + if let Some(base_url) = base_file_url { + let before_query = match (base_url.query_start, base_url.fragment_start) { + (None, None) => &*base_url.serialization, + (Some(i), _) | + (None, Some(i)) => base_url.slice(..i) + }; + self.serialization.push_str(before_query); + self.pop_path(SchemeType::File, base_url.path_start as usize); + let remaining = self.parse_path( + SchemeType::File, &mut true, base_url.path_start as usize, input); + self.with_query_and_fragment( + base_url.scheme_end, base_url.username_end, base_url.host_start, + base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining) + } else { + self.serialization.push_str("file:///"); + let scheme_end = "file".len() as u32; + let path_start = "file://".len(); + let remaining = self.parse_path( + SchemeType::File, &mut false, path_start, input); + let (query_start, fragment_start) = + try!(self.parse_query_and_fragment(scheme_end, remaining)); + let path_start = path_start as u32; + Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: path_start, + host_start: path_start, + host_end: path_start, + host: HostInternal::None, + port: None, + path_start: path_start, + query_start: query_start, + fragment_start: fragment_start, + }) + } + } + } + } + + fn parse_relative(mut self, input: Input, scheme_type: SchemeType, base_url: &Url) + -> ParseResult<Url> { + // relative state + debug_assert!(self.serialization.is_empty()); + let (first_char, input_after_first_char) = input.split_first(); + match first_char { + None => { + // Copy everything except the fragment + let before_fragment = match base_url.fragment_start { + Some(i) => &base_url.serialization[..i as usize], + None => &*base_url.serialization, + }; + self.serialization.push_str(before_fragment); + Ok(Url { + serialization: self.serialization, + fragment_start: None, + ..*base_url + }) + }, + Some('?') => { + // Copy everything up to the query string + let before_query = match (base_url.query_start, base_url.fragment_start) { + (None, None) => &*base_url.serialization, + (Some(i), _) | + (None, Some(i)) => base_url.slice(..i) + }; + self.serialization.push_str(before_query); + let (query_start, fragment_start) = + try!(self.parse_query_and_fragment(base_url.scheme_end, input)); + Ok(Url { + serialization: self.serialization, + query_start: query_start, + fragment_start: fragment_start, + ..*base_url + }) + }, + Some('#') => self.fragment_only(base_url, input), + Some('/') | Some('\\') => { + let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\')); + if slashes_count >= 2 { + self.syntax_violation_if("expected //", || { + input.clone().take_while(|&c| matches!(c, '/' | '\\')) + .collect::<String>() != "//" + }); + let scheme_end = base_url.scheme_end; + debug_assert!(base_url.byte_at(scheme_end) == b':'); + self.serialization.push_str(base_url.slice(..scheme_end + 1)); + return self.after_double_slash(remaining, scheme_type, scheme_end) + } + let path_start = base_url.path_start; + debug_assert!(base_url.byte_at(path_start) == b'/'); + self.serialization.push_str(base_url.slice(..path_start + 1)); + let remaining = self.parse_path( + scheme_type, &mut true, path_start as usize, input_after_first_char); + self.with_query_and_fragment( + base_url.scheme_end, base_url.username_end, base_url.host_start, + base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining) + } + _ => { + let before_query = match (base_url.query_start, base_url.fragment_start) { + (None, None) => &*base_url.serialization, + (Some(i), _) | + (None, Some(i)) => base_url.slice(..i) + }; + self.serialization.push_str(before_query); + // FIXME spec says just "remove last entry", not the "pop" algorithm + self.pop_path(scheme_type, base_url.path_start as usize); + let remaining = self.parse_path( + scheme_type, &mut true, base_url.path_start as usize, input); + self.with_query_and_fragment( + base_url.scheme_end, base_url.username_end, base_url.host_start, + base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining) + } + } + } + + fn after_double_slash(mut self, input: Input, scheme_type: SchemeType, scheme_end: u32) + -> ParseResult<Url> { + self.serialization.push('/'); + self.serialization.push('/'); + // authority state + let (username_end, remaining) = try!(self.parse_userinfo(input, scheme_type)); + // host state + let host_start = try!(to_u32(self.serialization.len())); + let (host_end, host, port, remaining) = + try!(self.parse_host_and_port(remaining, scheme_end, scheme_type)); + // path state + let path_start = try!(to_u32(self.serialization.len())); + let remaining = self.parse_path_start( + scheme_type, &mut true, remaining); + self.with_query_and_fragment(scheme_end, username_end, host_start, + host_end, host, port, path_start, remaining) + } + + /// Return (username_end, remaining) + fn parse_userinfo<'i>(&mut self, mut input: Input<'i>, scheme_type: SchemeType) + -> ParseResult<(u32, Input<'i>)> { + let mut last_at = None; + let mut remaining = input.clone(); + let mut char_count = 0; + while let Some(c) = remaining.next() { + match c { + '@' => { + if last_at.is_some() { + self.syntax_violation("unencoded @ sign in username or password") + } else { + self.syntax_violation( + "embedding authentification information (username or password) \ + in an URL is not recommended") + } + last_at = Some((char_count, remaining.clone())) + }, + '/' | '?' | '#' => break, + '\\' if scheme_type.is_special() => break, + _ => (), + } + char_count += 1; + } + let (mut userinfo_char_count, remaining) = match last_at { + None => return Ok((try!(to_u32(self.serialization.len())), input)), + Some((0, remaining)) => return Ok((try!(to_u32(self.serialization.len())), remaining)), + Some(x) => x + }; + + let mut username_end = None; + while userinfo_char_count > 0 { + let (c, utf8_c) = input.next_utf8().unwrap(); + userinfo_char_count -= 1; + if c == ':' && username_end.is_none() { + // Start parsing password + username_end = Some(try!(to_u32(self.serialization.len()))); + self.serialization.push(':'); + } else { + self.check_url_code_point(c, &input); + self.serialization.extend(utf8_percent_encode(utf8_c, USERINFO_ENCODE_SET)); + } + } + let username_end = match username_end { + Some(i) => i, + None => try!(to_u32(self.serialization.len())), + }; + self.serialization.push('@'); + Ok((username_end, remaining)) + } + + fn parse_host_and_port<'i>(&mut self, input: Input<'i>, + scheme_end: u32, scheme_type: SchemeType) + -> ParseResult<(u32, HostInternal, Option<u16>, Input<'i>)> { + let (host, remaining) = try!( + Parser::parse_host(input, scheme_type)); + write!(&mut self.serialization, "{}", host).unwrap(); + let host_end = try!(to_u32(self.serialization.len())); + let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') { + let scheme = || default_port(&self.serialization[..scheme_end as usize]); + try!(Parser::parse_port(remaining, scheme, self.context)) + } else { + (None, remaining) + }; + if let Some(port) = port { + write!(&mut self.serialization, ":{}", port).unwrap() + } + Ok((host_end, host.into(), port, remaining)) + } + + pub fn parse_host<'i>(mut input: Input<'i>, scheme_type: SchemeType) + -> ParseResult<(Host<String>, Input<'i>)> { + // Undo the Input abstraction here to avoid allocating in the common case + // where the host part of the input does not contain any tab or newline + let input_str = input.chars.as_str(); + let mut inside_square_brackets = false; + let mut has_ignored_chars = false; + let mut non_ignored_chars = 0; + let mut bytes = 0; + for c in input_str.chars() { + match c { + ':' if !inside_square_brackets => break, + '\\' if scheme_type.is_special() => break, + '/' | '?' | '#' => break, + '\t' | '\n' | '\r' => { + has_ignored_chars = true; + } + '[' => { + inside_square_brackets = true; + non_ignored_chars += 1 + } + ']' => { + inside_square_brackets = false; + non_ignored_chars += 1 + } + _ => non_ignored_chars += 1 + } + bytes += c.len_utf8(); + } + let replaced: String; + let host_str; + { + let host_input = input.by_ref().take(non_ignored_chars); + if has_ignored_chars { + replaced = host_input.collect(); + host_str = &*replaced + } else { + for _ in host_input {} + host_str = &input_str[..bytes] + } + } + if scheme_type.is_special() && host_str.is_empty() { + return Err(ParseError::EmptyHost) + } + let host = try!(Host::parse(host_str)); + Ok((host, input)) + } + + pub fn parse_file_host<'i>(&mut self, input: Input<'i>) + -> ParseResult<(bool, HostInternal, Input<'i>)> { + // Undo the Input abstraction here to avoid allocating in the common case + // where the host part of the input does not contain any tab or newline + let input_str = input.chars.as_str(); + let mut has_ignored_chars = false; + let mut non_ignored_chars = 0; + let mut bytes = 0; + for c in input_str.chars() { + match c { + '/' | '\\' | '?' | '#' => break, + '\t' | '\n' | '\r' => has_ignored_chars = true, + _ => non_ignored_chars += 1, + } + bytes += c.len_utf8(); + } + let replaced: String; + let host_str; + let mut remaining = input.clone(); + { + let host_input = remaining.by_ref().take(non_ignored_chars); + if has_ignored_chars { + replaced = host_input.collect(); + host_str = &*replaced + } else { + for _ in host_input {} + host_str = &input_str[..bytes] + } + } + if is_windows_drive_letter(host_str) { + return Ok((false, HostInternal::None, input)) + } + let host = if host_str.is_empty() { + HostInternal::None + } else { + match try!(Host::parse(host_str)) { + Host::Domain(ref d) if d == "localhost" => HostInternal::None, + host => { + write!(&mut self.serialization, "{}", host).unwrap(); + host.into() + } + } + }; + Ok((true, host, remaining)) + } + + pub fn parse_port<'i, P>(mut input: Input<'i>, default_port: P, + context: Context) + -> ParseResult<(Option<u16>, Input<'i>)> + where P: Fn() -> Option<u16> { + let mut port: u32 = 0; + let mut has_any_digit = false; + while let (Some(c), remaining) = input.split_first() { + if let Some(digit) = c.to_digit(10) { + port = port * 10 + digit; + if port > ::std::u16::MAX as u32 { + return Err(ParseError::InvalidPort) + } + has_any_digit = true; + } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') { + return Err(ParseError::InvalidPort) + } else { + break + } + input = remaining; + } + let mut opt_port = Some(port as u16); + if !has_any_digit || opt_port == default_port() { + opt_port = None; + } + return Ok((opt_port, input)) + } + + pub fn parse_path_start<'i>(&mut self, scheme_type: SchemeType, has_host: &mut bool, + mut input: Input<'i>) + -> Input<'i> { + // Path start state + match input.split_first() { + (Some('/'), remaining) => input = remaining, + (Some('\\'), remaining) => if scheme_type.is_special() { + self.syntax_violation("backslash"); + input = remaining + }, + _ => {} + } + let path_start = self.serialization.len(); + self.serialization.push('/'); + self.parse_path(scheme_type, has_host, path_start, input) + } + + pub fn parse_path<'i>(&mut self, scheme_type: SchemeType, has_host: &mut bool, + path_start: usize, mut input: Input<'i>) + -> Input<'i> { + // Relative path state + debug_assert!(self.serialization.ends_with("/")); + loop { + let segment_start = self.serialization.len(); + let mut ends_with_slash = false; + loop { + let input_before_c = input.clone(); + let (c, utf8_c) = if let Some(x) = input.next_utf8() { x } else { break }; + match c { + '/' if self.context != Context::PathSegmentSetter => { + ends_with_slash = true; + break + }, + '\\' if self.context != Context::PathSegmentSetter && + scheme_type.is_special() => { + self.syntax_violation("backslash"); + ends_with_slash = true; + break + }, + '?' | '#' if self.context == Context::UrlParser => { + input = input_before_c; + break + }, + _ => { + self.check_url_code_point(c, &input); + if c == '%' { + let after_percent_sign = input.clone(); + if matches!(input.next(), Some('2')) && + matches!(input.next(), Some('E') | Some('e')) { + self.serialization.push('.'); + continue + } + input = after_percent_sign + } + if self.context == Context::PathSegmentSetter { + self.serialization.extend(utf8_percent_encode( + utf8_c, PATH_SEGMENT_ENCODE_SET)); + } else { + self.serialization.extend(utf8_percent_encode( + utf8_c, DEFAULT_ENCODE_SET)); + } + } + } + } + match &self.serialization[segment_start..] { + ".." => { + debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/'); + self.serialization.truncate(segment_start - 1); // Truncate "/.." + self.pop_path(scheme_type, path_start); + if !self.serialization[path_start..].ends_with("/") { + self.serialization.push('/') + } + }, + "." => { + self.serialization.truncate(segment_start); + }, + _ => { + if scheme_type.is_file() && is_windows_drive_letter( + &self.serialization[path_start + 1..] + ) { + if self.serialization.ends_with('|') { + self.serialization.pop(); + self.serialization.push(':'); + } + if *has_host { + self.syntax_violation("file: with host and Windows drive letter"); + *has_host = false; // FIXME account for this in callers + } + } + if ends_with_slash { + self.serialization.push('/') + } + } + } + if !ends_with_slash { + break + } + } + input + } + + /// https://url.spec.whatwg.org/#pop-a-urls-path + fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) { + if self.serialization.len() > path_start { + let slash_position = self.serialization[path_start..].rfind('/').unwrap(); + // + 1 since rfind returns the position before the slash. + let segment_start = path_start + slash_position + 1; + // Don’t pop a Windows drive letter + // FIXME: *normalized* Windows drive letter + if !( + scheme_type.is_file() && + is_windows_drive_letter(&self.serialization[segment_start..]) + ) { + self.serialization.truncate(segment_start); + } + } + + } + + pub fn parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i> { + loop { + let input_before_c = input.clone(); + match input.next_utf8() { + Some(('?', _)) | Some(('#', _)) if self.context == Context::UrlParser => { + return input_before_c + } + Some((c, utf8_c)) => { + self.check_url_code_point(c, &input); + self.serialization.extend(utf8_percent_encode( + utf8_c, SIMPLE_ENCODE_SET)); + } + None => return input + } + } + } + + fn with_query_and_fragment(mut self, scheme_end: u32, username_end: u32, + host_start: u32, host_end: u32, host: HostInternal, + port: Option<u16>, path_start: u32, remaining: Input) + -> ParseResult<Url> { + let (query_start, fragment_start) = + try!(self.parse_query_and_fragment(scheme_end, remaining)); + Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: username_end, + host_start: host_start, + host_end: host_end, + host: host, + port: port, + path_start: path_start, + query_start: query_start, + fragment_start: fragment_start + }) + } + + /// Return (query_start, fragment_start) + fn parse_query_and_fragment(&mut self, scheme_end: u32, mut input: Input) + -> ParseResult<(Option<u32>, Option<u32>)> { + let mut query_start = None; + match input.next() { + Some('#') => {} + Some('?') => { + query_start = Some(try!(to_u32(self.serialization.len()))); + self.serialization.push('?'); + let remaining = self.parse_query(scheme_end, input); + if let Some(remaining) = remaining { + input = remaining + } else { + return Ok((query_start, None)) + } + } + None => return Ok((None, None)), + _ => panic!("Programming error. parse_query_and_fragment() called without ? or # {:?}") + } + + let fragment_start = try!(to_u32(self.serialization.len())); + self.serialization.push('#'); + self.parse_fragment(input); + Ok((query_start, Some(fragment_start))) + } + + pub fn parse_query<'i>(&mut self, scheme_end: u32, mut input: Input<'i>) + -> Option<Input<'i>> { + let mut query = String::new(); // FIXME: use a streaming decoder instead + let mut remaining = None; + while let Some(c) = input.next() { + if c == '#' && self.context == Context::UrlParser { + remaining = Some(input); + break + } else { + self.check_url_code_point(c, &input); + query.push(c); + } + } + + let encoding = match &self.serialization[..scheme_end as usize] { + "http" | "https" | "file" | "ftp" | "gopher" => self.query_encoding_override, + _ => EncodingOverride::utf8(), + }; + let query_bytes = encoding.encode(query.into()); + self.serialization.extend(percent_encode(&query_bytes, QUERY_ENCODE_SET)); + remaining + } + + fn fragment_only(mut self, base_url: &Url, mut input: Input) -> ParseResult<Url> { + let before_fragment = match base_url.fragment_start { + Some(i) => base_url.slice(..i), + None => &*base_url.serialization, + }; + debug_assert!(self.serialization.is_empty()); + self.serialization.reserve(before_fragment.len() + input.chars.as_str().len()); + self.serialization.push_str(before_fragment); + self.serialization.push('#'); + let next = input.next(); + debug_assert!(next == Some('#')); + self.parse_fragment(input); + Ok(Url { + serialization: self.serialization, + fragment_start: Some(try!(to_u32(before_fragment.len()))), + ..*base_url + }) + } + + pub fn parse_fragment(&mut self, mut input: Input) { + while let Some(c) = input.next() { + if c == '\0' { + self.syntax_violation("NULL characters are ignored in URL fragment identifiers") + } else { + self.check_url_code_point(c, &input); + self.serialization.push(c); // No percent-encoding here. + } + } + } + + fn check_url_code_point(&self, c: char, input: &Input) { + if let Some(log) = self.log_syntax_violation { + if c == '%' { + let mut input = input.clone(); + if !matches!((input.next(), input.next()), (Some(a), Some(b)) + if is_ascii_hex_digit(a) && is_ascii_hex_digit(b)) { + log("expected 2 hex digits after %") + } + } else if !is_url_code_point(c) { + log("non-URL code point") + } + } + } +} + +#[inline] +fn is_ascii_hex_digit(c: char) -> bool { + matches!(c, 'a'...'f' | 'A'...'F' | '0'...'9') +} + +// Non URL code points: +// U+0000 to U+0020 (space) +// " # % < > [ \ ] ^ ` { | } +// U+007F to U+009F +// surrogates +// U+FDD0 to U+FDEF +// Last two of each plane: U+__FFFE to U+__FFFF for __ in 00 to 10 hex +#[inline] +fn is_url_code_point(c: char) -> bool { + matches!(c, + 'a'...'z' | + 'A'...'Z' | + '0'...'9' | + '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' | + '.' | '/' | ':' | ';' | '=' | '?' | '@' | '_' | '~' | + '\u{A0}'...'\u{D7FF}' | '\u{E000}'...'\u{FDCF}' | '\u{FDF0}'...'\u{FFFD}' | + '\u{10000}'...'\u{1FFFD}' | '\u{20000}'...'\u{2FFFD}' | + '\u{30000}'...'\u{3FFFD}' | '\u{40000}'...'\u{4FFFD}' | + '\u{50000}'...'\u{5FFFD}' | '\u{60000}'...'\u{6FFFD}' | + '\u{70000}'...'\u{7FFFD}' | '\u{80000}'...'\u{8FFFD}' | + '\u{90000}'...'\u{9FFFD}' | '\u{A0000}'...'\u{AFFFD}' | + '\u{B0000}'...'\u{BFFFD}' | '\u{C0000}'...'\u{CFFFD}' | + '\u{D0000}'...'\u{DFFFD}' | '\u{E1000}'...'\u{EFFFD}' | + '\u{F0000}'...'\u{FFFFD}' | '\u{100000}'...'\u{10FFFD}') +} + +/// https://url.spec.whatwg.org/#c0-controls-and-space +#[inline] +fn c0_control_or_space(ch: char) -> bool { + ch <= ' ' // U+0000 to U+0020 +} + +/// https://url.spec.whatwg.org/#ascii-alpha +#[inline] +pub fn ascii_alpha(ch: char) -> bool { + matches!(ch, 'a'...'z' | 'A'...'Z') +} + +#[inline] +pub fn to_u32(i: usize) -> ParseResult<u32> { + if i <= ::std::u32::MAX as usize { + Ok(i as u32) + } else { + Err(ParseError::Overflow) + } +} + +/// Wether the scheme is file:, the path has a single segment, and that segment +/// is a Windows drive letter +fn is_windows_drive_letter(segment: &str) -> bool { + segment.len() == 2 + && starts_with_windows_drive_letter(segment) +} + +fn starts_with_windows_drive_letter(s: &str) -> bool { + ascii_alpha(s.as_bytes()[0] as char) + && matches!(s.as_bytes()[1], b':' | b'|') +} + +fn starts_with_windows_drive_letter_segment(input: &Input) -> bool { + let mut input = input.clone(); + matches!((input.next(), input.next(), input.next()), (Some(a), Some(b), Some(c)) + if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#')) +} diff --git a/third_party/rust/url/src/path_segments.rs b/third_party/rust/url/src/path_segments.rs new file mode 100644 index 000000000..437a84ee7 --- /dev/null +++ b/third_party/rust/url/src/path_segments.rs @@ -0,0 +1,187 @@ +// Copyright 2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use parser::{self, SchemeType, to_u32}; +use std::str; +use Url; + +/// Exposes methods to manipulate the path of an URL that is not cannot-be-base. +/// +/// The path always starts with a `/` slash, and is made of slash-separated segments. +/// There is always at least one segment (which may be the empty string). +/// +/// Examples: +/// +/// ```rust +/// # use url::Url; +/// let mut url = Url::parse("mailto:me@example.com").unwrap(); +/// assert!(url.path_segments_mut().is_err()); +/// +/// let mut url = Url::parse("http://example.net/foo/index.html").unwrap(); +/// url.path_segments_mut().unwrap().pop().push("img").push("2/100%.png"); +/// assert_eq!(url.as_str(), "http://example.net/foo/img/2%2F100%25.png"); +/// ``` +pub struct PathSegmentsMut<'a> { + url: &'a mut Url, + after_first_slash: usize, + after_path: String, + old_after_path_position: u32, +} + +// Not re-exported outside the crate +pub fn new(url: &mut Url) -> PathSegmentsMut { + let after_path = url.take_after_path(); + let old_after_path_position = to_u32(url.serialization.len()).unwrap(); + debug_assert!(url.byte_at(url.path_start) == b'/'); + PathSegmentsMut { + after_first_slash: url.path_start as usize + "/".len(), + url: url, + old_after_path_position: old_after_path_position, + after_path: after_path, + } +} + +impl<'a> Drop for PathSegmentsMut<'a> { + fn drop(&mut self) { + self.url.restore_after_path(self.old_after_path_position, &self.after_path) + } +} + +impl<'a> PathSegmentsMut<'a> { + /// Remove all segments in the path, leaving the minimal `url.path() == "/"`. + /// + /// Returns `&mut Self` so that method calls can be chained. + /// + /// Example: + /// + /// ```rust + /// # use url::Url; + /// let mut url = Url::parse("https://github.com/servo/rust-url/").unwrap(); + /// url.path_segments_mut().unwrap().clear().push("logout"); + /// assert_eq!(url.as_str(), "https://github.com/logout"); + /// ``` + pub fn clear(&mut self) -> &mut Self { + self.url.serialization.truncate(self.after_first_slash); + self + } + + /// Remove the last segment of this URL’s path if it is empty, + /// except if these was only one segment to begin with. + /// + /// In other words, remove one path trailing slash, if any, + /// unless it is also the initial slash (so this does nothing if `url.path() == "/")`. + /// + /// Returns `&mut Self` so that method calls can be chained. + /// + /// Example: + /// + /// ```rust + /// # use url::Url; + /// let mut url = Url::parse("https://github.com/servo/rust-url/").unwrap(); + /// url.path_segments_mut().unwrap().push("pulls"); + /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url//pulls"); + /// + /// let mut url = Url::parse("https://github.com/servo/rust-url/").unwrap(); + /// url.path_segments_mut().unwrap().pop_if_empty().push("pulls"); + /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls"); + /// ``` + pub fn pop_if_empty(&mut self) -> &mut Self { + if self.url.serialization[self.after_first_slash..].ends_with('/') { + self.url.serialization.pop(); + } + self + } + + /// Remove the last segment of this URL’s path. + /// + /// If the path only has one segment, make it empty such that `url.path() == "/"`. + /// + /// Returns `&mut Self` so that method calls can be chained. + pub fn pop(&mut self) -> &mut Self { + let last_slash = self.url.serialization[self.after_first_slash..].rfind('/').unwrap_or(0); + self.url.serialization.truncate(self.after_first_slash + last_slash); + self + } + + /// Append the given segment at the end of this URL’s path. + /// + /// See the documentation for `.extend()`. + /// + /// Returns `&mut Self` so that method calls can be chained. + pub fn push(&mut self, segment: &str) -> &mut Self { + self.extend(Some(segment)) + } + + /// Append each segment from the given iterator at the end of this URL’s path. + /// + /// Each segment is percent-encoded like in `Url::parse` or `Url::join`, + /// except that `%` and `/` characters are also encoded (to `%25` and `%2F`). + /// This is unlike `Url::parse` where `%` is left as-is in case some of the input + /// is already percent-encoded, and `/` denotes a path segment separator.) + /// + /// Note that, in addition to slashes between new segments, + /// this always adds a slash between the existing path and the new segments + /// *except* if the existing path is `"/"`. + /// If the previous last segment was empty (if the path had a trailing slash) + /// the path after `.extend()` will contain two consecutive slashes. + /// If that is undesired, call `.pop_if_empty()` first. + /// + /// To obtain a behavior similar to `Url::join`, call `.pop()` unconditionally first. + /// + /// Returns `&mut Self` so that method calls can be chained. + /// + /// Example: + /// + /// ```rust + /// # use url::Url; + /// let mut url = Url::parse("https://github.com/").unwrap(); + /// let org = "servo"; + /// let repo = "rust-url"; + /// let issue_number = "188"; + /// url.path_segments_mut().unwrap().extend(&[org, repo, "issues", issue_number]); + /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/issues/188"); + /// ``` + /// + /// In order to make sure that parsing the serialization of an URL gives the same URL, + /// a segment is ignored if it is `"."` or `".."`: + /// + /// ```rust + /// # use url::Url; + /// let mut url = Url::parse("https://github.com/servo").unwrap(); + /// url.path_segments_mut().unwrap().extend(&["..", "rust-url", ".", "pulls"]); + /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls"); + /// ``` + pub fn extend<I>(&mut self, segments: I) -> &mut Self + where I: IntoIterator, I::Item: AsRef<str> { + let scheme_type = SchemeType::from(self.url.scheme()); + let path_start = self.url.path_start as usize; + self.url.mutate(|parser| { + parser.context = parser::Context::PathSegmentSetter; + for segment in segments { + let segment = segment.as_ref(); + if matches!(segment, "." | "..") { + continue + } + if parser.serialization.len() > path_start + 1 { + parser.serialization.push('/'); + } + let mut has_host = true; // FIXME account for this? + parser.parse_path(scheme_type, &mut has_host, path_start, + parser::Input::new(segment)); + } + }); + self + } + + /// For internal testing, not part of the public API. + #[doc(hidden)] + pub fn assert_url_invariants(&mut self) -> &mut Self { + self.url.assert_invariants(); + self + } +} diff --git a/third_party/rust/url/src/percent_encoding.rs b/third_party/rust/url/src/percent_encoding.rs new file mode 100644 index 000000000..a4fb6177a --- /dev/null +++ b/third_party/rust/url/src/percent_encoding.rs @@ -0,0 +1,344 @@ +// Copyright 2013-2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use encoding; +use std::ascii::AsciiExt; +use std::borrow::Cow; +use std::fmt; +use std::slice; +use std::str; + +/// Represents a set of characters / bytes that should be percent-encoded. +/// +/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set). +/// +/// Different characters need to be encoded in different parts of an URL. +/// For example, a literal `?` question mark in an URL’s path would indicate +/// the start of the query string. +/// A question mark meant to be part of the path therefore needs to be percent-encoded. +/// In the query string however, a question mark does not have any special meaning +/// and does not need to be percent-encoded. +/// +/// A few sets are defined in this module. +/// Use the [`define_encode_set!`](../macro.define_encode_set!.html) macro to define different ones. +pub trait EncodeSet: Clone { + /// Called with UTF-8 bytes rather than code points. + /// Should return true for all non-ASCII bytes. + fn contains(&self, byte: u8) -> bool; +} + +/// Define a new struct +/// that implements the [`EncodeSet`](percent_encoding/trait.EncodeSet.html) trait, +/// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html) +/// and related functions. +/// +/// Parameters are characters to include in the set in addition to those of the base set. +/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set). +/// +/// Example +/// ======= +/// +/// ```rust +/// #[macro_use] extern crate url; +/// use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; +/// define_encode_set! { +/// /// This encode set is used in the URL parser for query strings. +/// pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'} +/// } +/// # fn main() { +/// assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::<String>(), "foo%20bar"); +/// # } +/// ``` +#[macro_export] +macro_rules! define_encode_set { + ($(#[$attr: meta])* pub $name: ident = [$base_set: expr] | {$($ch: pat),*}) => { + $(#[$attr])* + #[derive(Copy, Clone)] + #[allow(non_camel_case_types)] + pub struct $name; + + impl $crate::percent_encoding::EncodeSet for $name { + #[inline] + fn contains(&self, byte: u8) -> bool { + match byte as char { + $( + $ch => true, + )* + _ => $base_set.contains(byte) + } + } + } + } +} + +/// This encode set is used for the path of cannot-be-a-base URLs. +#[derive(Copy, Clone)] +#[allow(non_camel_case_types)] +pub struct SIMPLE_ENCODE_SET; + +impl EncodeSet for SIMPLE_ENCODE_SET { + #[inline] + fn contains(&self, byte: u8) -> bool { + byte < 0x20 || byte > 0x7E + } +} + +define_encode_set! { + /// This encode set is used in the URL parser for query strings. + pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'} +} + +define_encode_set! { + /// This encode set is used for path components. + pub DEFAULT_ENCODE_SET = [QUERY_ENCODE_SET] | {'`', '?', '{', '}'} +} + +define_encode_set! { + /// This encode set is used for on '/'-separated path segment + pub PATH_SEGMENT_ENCODE_SET = [DEFAULT_ENCODE_SET] | {'%', '/'} +} + +define_encode_set! { + /// This encode set is used for username and password. + pub USERINFO_ENCODE_SET = [DEFAULT_ENCODE_SET] | { + '/', ':', ';', '=', '@', '[', '\\', ']', '^', '|' + } +} + +/// Return the percent-encoding of the given bytes. +/// +/// This is unconditional, unlike `percent_encode()` which uses an encode set. +pub fn percent_encode_byte(byte: u8) -> &'static str { + let index = usize::from(byte) * 3; + &"\ + %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F\ + %10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F\ + %20%21%22%23%24%25%26%27%28%29%2A%2B%2C%2D%2E%2F\ + %30%31%32%33%34%35%36%37%38%39%3A%3B%3C%3D%3E%3F\ + %40%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F\ + %50%51%52%53%54%55%56%57%58%59%5A%5B%5C%5D%5E%5F\ + %60%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F\ + %70%71%72%73%74%75%76%77%78%79%7A%7B%7C%7D%7E%7F\ + %80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F\ + %90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F\ + %A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF\ + %B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF\ + %C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF\ + %D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF\ + %E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF\ + %F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF\ + "[index..index + 3] +} + +/// Percent-encode the given bytes with the given encode set. +/// +/// The encode set define which bytes (in addition to non-ASCII and controls) +/// need to be percent-encoded. +/// The choice of this set depends on context. +/// For example, `?` needs to be encoded in an URL path but not in a query string. +/// +/// The return value is an iterator of `&str` slices (so it has a `.collect::<String>()` method) +/// that also implements `Display` and `Into<Cow<str>>`. +/// The latter returns `Cow::Borrowed` when none of the bytes in `input` +/// are in the given encode set. +#[inline] +pub fn percent_encode<E: EncodeSet>(input: &[u8], encode_set: E) -> PercentEncode<E> { + PercentEncode { + bytes: input, + encode_set: encode_set, + } +} + +/// Percent-encode the UTF-8 encoding of the given string. +/// +/// See `percent_encode()` for how to use the return value. +#[inline] +pub fn utf8_percent_encode<E: EncodeSet>(input: &str, encode_set: E) -> PercentEncode<E> { + percent_encode(input.as_bytes(), encode_set) +} + +/// The return type of `percent_encode()` and `utf8_percent_encode()`. +#[derive(Clone)] +pub struct PercentEncode<'a, E: EncodeSet> { + bytes: &'a [u8], + encode_set: E, +} + +impl<'a, E: EncodeSet> Iterator for PercentEncode<'a, E> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + if let Some((&first_byte, remaining)) = self.bytes.split_first() { + if self.encode_set.contains(first_byte) { + self.bytes = remaining; + Some(percent_encode_byte(first_byte)) + } else { + assert!(first_byte.is_ascii()); + for (i, &byte) in remaining.iter().enumerate() { + if self.encode_set.contains(byte) { + // 1 for first_byte + i for previous iterations of this loop + let (unchanged_slice, remaining) = self.bytes.split_at(1 + i); + self.bytes = remaining; + return Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }) + } else { + assert!(byte.is_ascii()); + } + } + let unchanged_slice = self.bytes; + self.bytes = &[][..]; + Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }) + } + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + if self.bytes.is_empty() { + (0, Some(0)) + } else { + (1, Some(self.bytes.len())) + } + } +} + +impl<'a, E: EncodeSet> fmt::Display for PercentEncode<'a, E> { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + for c in (*self).clone() { + try!(formatter.write_str(c)) + } + Ok(()) + } +} + +impl<'a, E: EncodeSet> From<PercentEncode<'a, E>> for Cow<'a, str> { + fn from(mut iter: PercentEncode<'a, E>) -> Self { + match iter.next() { + None => "".into(), + Some(first) => { + match iter.next() { + None => first.into(), + Some(second) => { + let mut string = first.to_owned(); + string.push_str(second); + string.extend(iter); + string.into() + } + } + } + } + } +} + +/// Percent-decode the given bytes. +/// +/// The return value is an iterator of decoded `u8` bytes +/// that also implements `Into<Cow<u8>>` +/// (which returns `Cow::Borrowed` when `input` contains no percent-encoded sequence) +/// and has `decode_utf8()` and `decode_utf8_lossy()` methods. +#[inline] +pub fn percent_decode<'a>(input: &'a [u8]) -> PercentDecode<'a> { + PercentDecode { + bytes: input.iter() + } +} + +/// The return type of `percent_decode()`. +#[derive(Clone)] +pub struct PercentDecode<'a> { + bytes: slice::Iter<'a, u8>, +} + +fn after_percent_sign(iter: &mut slice::Iter<u8>) -> Option<u8> { + let initial_iter = iter.clone(); + let h = iter.next().and_then(|&b| (b as char).to_digit(16)); + let l = iter.next().and_then(|&b| (b as char).to_digit(16)); + if let (Some(h), Some(l)) = (h, l) { + Some(h as u8 * 0x10 + l as u8) + } else { + *iter = initial_iter; + None + } +} + +impl<'a> Iterator for PercentDecode<'a> { + type Item = u8; + + fn next(&mut self) -> Option<u8> { + self.bytes.next().map(|&byte| { + if byte == b'%' { + after_percent_sign(&mut self.bytes).unwrap_or(byte) + } else { + byte + } + }) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let bytes = self.bytes.len(); + (bytes / 3, Some(bytes)) + } +} + +impl<'a> From<PercentDecode<'a>> for Cow<'a, [u8]> { + fn from(iter: PercentDecode<'a>) -> Self { + match iter.if_any() { + Some(vec) => Cow::Owned(vec), + None => Cow::Borrowed(iter.bytes.as_slice()), + } + } +} + +impl<'a> PercentDecode<'a> { + /// If the percent-decoding is different from the input, return it as a new bytes vector. + pub fn if_any(&self) -> Option<Vec<u8>> { + let mut bytes_iter = self.bytes.clone(); + while bytes_iter.find(|&&b| b == b'%').is_some() { + if let Some(decoded_byte) = after_percent_sign(&mut bytes_iter) { + let initial_bytes = self.bytes.as_slice(); + let unchanged_bytes_len = initial_bytes.len() - bytes_iter.len() - 3; + let mut decoded = initial_bytes[..unchanged_bytes_len].to_owned(); + decoded.push(decoded_byte); + decoded.extend(PercentDecode { + bytes: bytes_iter + }); + return Some(decoded) + } + } + // Nothing to decode + None + } + + /// Decode the result of percent-decoding as UTF-8. + /// + /// This is return `Err` when the percent-decoded bytes are not well-formed in UTF-8. + pub fn decode_utf8(self) -> Result<Cow<'a, str>, str::Utf8Error> { + match self.clone().into() { + Cow::Borrowed(bytes) => { + match str::from_utf8(bytes) { + Ok(s) => Ok(s.into()), + Err(e) => Err(e), + } + } + Cow::Owned(bytes) => { + match String::from_utf8(bytes) { + Ok(s) => Ok(s.into()), + Err(e) => Err(e.utf8_error()), + } + } + } + } + + /// Decode the result of percent-decoding as UTF-8, lossily. + /// + /// Invalid UTF-8 percent-encoded byte sequences will be replaced � U+FFFD, + /// the replacement character. + pub fn decode_utf8_lossy(self) -> Cow<'a, str> { + encoding::decode_utf8_lossy(self.clone().into()) + } +} diff --git a/third_party/rust/url/src/quirks.rs b/third_party/rust/url/src/quirks.rs new file mode 100644 index 000000000..9a7537f47 --- /dev/null +++ b/third_party/rust/url/src/quirks.rs @@ -0,0 +1,217 @@ +// Copyright 2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Getters and setters for URL components implemented per https://url.spec.whatwg.org/#api +//! +//! Unless you need to be interoperable with web browsers, +//! you probably want to use `Url` method instead. + +use {Url, Position, Host, ParseError, idna}; +use parser::{Parser, SchemeType, default_port, Context, Input}; + +/// https://url.spec.whatwg.org/#dom-url-domaintoascii +pub fn domain_to_ascii(domain: &str) -> String { + match Host::parse(domain) { + Ok(Host::Domain(domain)) => domain, + _ => String::new(), + } +} + +/// https://url.spec.whatwg.org/#dom-url-domaintounicode +pub fn domain_to_unicode(domain: &str) -> String { + match Host::parse(domain) { + Ok(Host::Domain(ref domain)) => { + let (unicode, _errors) = idna::domain_to_unicode(domain); + unicode + } + _ => String::new(), + } +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-href +pub fn href(url: &Url) -> &str { + url.as_str() +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-href +pub fn set_href(url: &mut Url, value: &str) -> Result<(), ParseError> { + *url = try!(Url::parse(value)); + Ok(()) +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-origin +pub fn origin(url: &Url) -> String { + url.origin().unicode_serialization() +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-protocol +#[inline] +pub fn protocol(url: &Url) -> &str { + &url.as_str()[..url.scheme().len() + ":".len()] +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-protocol +pub fn set_protocol(url: &mut Url, mut new_protocol: &str) -> Result<(), ()> { + // The scheme state in the spec ignores everything after the first `:`, + // but `set_scheme` errors if there is more. + if let Some(position) = new_protocol.find(':') { + new_protocol = &new_protocol[..position]; + } + url.set_scheme(new_protocol) +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-username +#[inline] +pub fn username(url: &Url) -> &str { + url.username() +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-username +pub fn set_username(url: &mut Url, new_username: &str) -> Result<(), ()> { + url.set_username(new_username) +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-password +#[inline] +pub fn password(url: &Url) -> &str { + url.password().unwrap_or("") +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-password +pub fn set_password(url: &mut Url, new_password: &str) -> Result<(), ()> { + url.set_password(if new_password.is_empty() { None } else { Some(new_password) }) +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-host +#[inline] +pub fn host(url: &Url) -> &str { + &url[Position::BeforeHost..Position::AfterPort] +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-host +pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { + if url.cannot_be_a_base() { + return Err(()) + } + let host; + let opt_port; + { + let scheme = url.scheme(); + let result = Parser::parse_host(Input::new(new_host), SchemeType::from(scheme)); + match result { + Ok((h, remaining)) => { + host = h; + opt_port = if let Some(remaining) = remaining.split_prefix(':') { + Parser::parse_port(remaining, || default_port(scheme), Context::Setter) + .ok().map(|(port, _remaining)| port) + } else { + None + }; + } + Err(_) => return Err(()) + } + } + url.set_host_internal(host, opt_port); + Ok(()) +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-hostname +#[inline] +pub fn hostname(url: &Url) -> &str { + url.host_str().unwrap_or("") +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-hostname +pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> { + if url.cannot_be_a_base() { + return Err(()) + } + let result = Parser::parse_host(Input::new(new_hostname), SchemeType::from(url.scheme())); + if let Ok((host, _remaining)) = result { + url.set_host_internal(host, None); + Ok(()) + } else { + Err(()) + } +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-port +#[inline] +pub fn port(url: &Url) -> &str { + &url[Position::BeforePort..Position::AfterPort] +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-port +pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> { + let result; + { + // has_host implies !cannot_be_a_base + let scheme = url.scheme(); + if !url.has_host() || scheme == "file" { + return Err(()) + } + result = Parser::parse_port(Input::new(new_port), || default_port(scheme), Context::Setter) + } + if let Ok((new_port, _remaining)) = result { + url.set_port_internal(new_port); + Ok(()) + } else { + Err(()) + } +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-pathname +#[inline] +pub fn pathname(url: &Url) -> &str { + url.path() +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-pathname +pub fn set_pathname(url: &mut Url, new_pathname: &str) { + if !url.cannot_be_a_base() { + url.set_path(new_pathname) + } +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-search +pub fn search(url: &Url) -> &str { + trim(&url[Position::AfterPath..Position::AfterQuery]) +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-search +pub fn set_search(url: &mut Url, new_search: &str) { + url.set_query(match new_search { + "" => None, + _ if new_search.starts_with('?') => Some(&new_search[1..]), + _ => Some(new_search), + }) +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-hash +pub fn hash(url: &Url) -> &str { + trim(&url[Position::AfterQuery..]) +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-hash +pub fn set_hash(url: &mut Url, new_hash: &str) { + if url.scheme() != "javascript" { + url.set_fragment(match new_hash { + "" => None, + _ if new_hash.starts_with('#') => Some(&new_hash[1..]), + _ => Some(new_hash), + }) + } +} + +fn trim(s: &str) -> &str { + if s.len() == 1 { + "" + } else { + s + } +} diff --git a/third_party/rust/url/src/slicing.rs b/third_party/rust/url/src/slicing.rs new file mode 100644 index 000000000..926f3c796 --- /dev/null +++ b/third_party/rust/url/src/slicing.rs @@ -0,0 +1,182 @@ +// Copyright 2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::ops::{Range, RangeFrom, RangeTo, RangeFull, Index}; +use Url; + +impl Index<RangeFull> for Url { + type Output = str; + fn index(&self, _: RangeFull) -> &str { + &self.serialization + } +} + +impl Index<RangeFrom<Position>> for Url { + type Output = str; + fn index(&self, range: RangeFrom<Position>) -> &str { + &self.serialization[self.index(range.start)..] + } +} + +impl Index<RangeTo<Position>> for Url { + type Output = str; + fn index(&self, range: RangeTo<Position>) -> &str { + &self.serialization[..self.index(range.end)] + } +} + +impl Index<Range<Position>> for Url { + type Output = str; + fn index(&self, range: Range<Position>) -> &str { + &self.serialization[self.index(range.start)..self.index(range.end)] + } +} + +/// Indicates a position within a URL based on its components. +/// +/// A range of positions can be used for slicing `Url`: +/// +/// ```rust +/// # use url::{Url, Position}; +/// # fn something(some_url: Url) { +/// let serialization: &str = &some_url[..]; +/// let serialization_without_fragment: &str = &some_url[..Position::AfterQuery]; +/// let authority: &str = &some_url[Position::BeforeUsername..Position::AfterPort]; +/// let data_url_payload: &str = &some_url[Position::BeforePath..Position::AfterQuery]; +/// let scheme_relative: &str = &some_url[Position::BeforeUsername..]; +/// # } +/// ``` +/// +/// In a pseudo-grammar (where `[`…`]?` makes a sub-sequence optional), +/// URL components and delimiters that separate them are: +/// +/// ```notrust +/// url = +/// scheme ":" +/// [ "//" [ username [ ":" password ]? "@" ]? host [ ":" port ]? ]? +/// path [ "?" query ]? [ "#" fragment ]? +/// ``` +/// +/// When a given component is not present, +/// its "before" and "after" position are the same +/// (so that `&some_url[BeforeFoo..AfterFoo]` is the empty string) +/// and component ordering is preserved +/// (so that a missing query "is between" a path and a fragment). +/// +/// The end of a component and the start of the next are either the same or separate +/// by a delimiter. +/// (Not that the initial `/` of a path is considered part of the path here, not a delimiter.) +/// For example, `&url[..BeforeFragment]` would include a `#` delimiter (if present in `url`), +/// so `&url[..AfterQuery]` might be desired instead. +/// +/// `BeforeScheme` and `AfterFragment` are always the start and end of the entire URL, +/// so `&url[BeforeScheme..X]` is the same as `&url[..X]` +/// and `&url[X..AfterFragment]` is the same as `&url[X..]`. +#[derive(Copy, Clone, Debug)] +pub enum Position { + BeforeScheme, + AfterScheme, + BeforeUsername, + AfterUsername, + BeforePassword, + AfterPassword, + BeforeHost, + AfterHost, + BeforePort, + AfterPort, + BeforePath, + AfterPath, + BeforeQuery, + AfterQuery, + BeforeFragment, + AfterFragment +} + +impl Url { + #[inline] + fn index(&self, position: Position) -> usize { + match position { + Position::BeforeScheme => 0, + + Position::AfterScheme => self.scheme_end as usize, + + Position::BeforeUsername => if self.has_authority() { + self.scheme_end as usize + "://".len() + } else { + debug_assert!(self.byte_at(self.scheme_end) == b':'); + debug_assert!(self.scheme_end + ":".len() as u32 == self.username_end); + self.scheme_end as usize + ":".len() + }, + + Position::AfterUsername => self.username_end as usize, + + Position::BeforePassword => if self.has_authority() && + self.byte_at(self.username_end) == b':' { + self.username_end as usize + ":".len() + } else { + debug_assert!(self.username_end == self.host_start); + self.username_end as usize + }, + + Position::AfterPassword => if self.has_authority() && + self.byte_at(self.username_end) == b':' { + debug_assert!(self.byte_at(self.host_start - "@".len() as u32) == b'@'); + self.host_start as usize - "@".len() + } else { + debug_assert!(self.username_end == self.host_start); + self.host_start as usize + }, + + Position::BeforeHost => self.host_start as usize, + + Position::AfterHost => self.host_end as usize, + + Position::BeforePort => if self.port.is_some() { + debug_assert!(self.byte_at(self.host_end) == b':'); + self.host_end as usize + ":".len() + } else { + self.host_end as usize + }, + + Position::AfterPort => self.path_start as usize, + + Position::BeforePath => self.path_start as usize, + + Position::AfterPath => match (self.query_start, self.fragment_start) { + (Some(q), _) => q as usize, + (None, Some(f)) => f as usize, + (None, None) => self.serialization.len(), + }, + + Position::BeforeQuery => match (self.query_start, self.fragment_start) { + (Some(q), _) => { + debug_assert!(self.byte_at(q) == b'?'); + q as usize + "?".len() + } + (None, Some(f)) => f as usize, + (None, None) => self.serialization.len(), + }, + + Position::AfterQuery => match self.fragment_start { + None => self.serialization.len(), + Some(f) => f as usize, + }, + + Position::BeforeFragment => match self.fragment_start { + Some(f) => { + debug_assert!(self.byte_at(f) == b'#'); + f as usize + "#".len() + } + None => self.serialization.len(), + }, + + Position::AfterFragment => self.serialization.len(), + } + } +} + |