diff options
Diffstat (limited to 'third_party/rust/url/src/encoding.rs')
-rw-r--r-- | third_party/rust/url/src/encoding.rs | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/third_party/rust/url/src/encoding.rs b/third_party/rust/url/src/encoding.rs new file mode 100644 index 000000000..0703c788f --- /dev/null +++ b/third_party/rust/url/src/encoding.rs @@ -0,0 +1,135 @@ +// Copyright 2013-2014 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + + +//! Abstraction that conditionally compiles either to rust-encoding, +//! or to only support UTF-8. + +#[cfg(feature = "query_encoding")] extern crate encoding; + +use std::borrow::Cow; + +#[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap}; +#[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label; +#[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef; + +#[cfg(feature = "query_encoding")] +#[derive(Copy, Clone)] +pub struct EncodingOverride { + /// `None` means UTF-8. + encoding: Option<EncodingRef> +} + +#[cfg(feature = "query_encoding")] +impl EncodingOverride { + pub fn from_opt_encoding(encoding: Option<EncodingRef>) -> Self { + encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8) + } + + pub fn from_encoding(encoding: EncodingRef) -> Self { + EncodingOverride { + encoding: if encoding.name() == "utf-8" { None } else { Some(encoding) } + } + } + + #[inline] + pub fn utf8() -> Self { + EncodingOverride { encoding: None } + } + + pub fn lookup(label: &[u8]) -> Option<Self> { + // Don't use String::from_utf8_lossy since no encoding label contains U+FFFD + // https://encoding.spec.whatwg.org/#names-and-labels + ::std::str::from_utf8(label) + .ok() + .and_then(encoding_from_whatwg_label) + .map(Self::from_encoding) + } + + /// https://encoding.spec.whatwg.org/#get-an-output-encoding + pub fn to_output_encoding(self) -> Self { + if let Some(encoding) = self.encoding { + if matches!(encoding.name(), "utf-16le" | "utf-16be") { + return Self::utf8() + } + } + self + } + + pub fn is_utf8(&self) -> bool { + self.encoding.is_none() + } + + pub fn name(&self) -> &'static str { + match self.encoding { + Some(encoding) => encoding.name(), + None => "utf-8", + } + } + + pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { + match self.encoding { + // `encoding.decode` never returns `Err` when called with `DecoderTrap::Replace` + Some(encoding) => encoding.decode(&input, DecoderTrap::Replace).unwrap().into(), + None => decode_utf8_lossy(input), + } + } + + pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { + match self.encoding { + // `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape` + Some(encoding) => Cow::Owned(encoding.encode(&input, EncoderTrap::NcrEscape).unwrap()), + None => encode_utf8(input) + } + } +} + + +#[cfg(not(feature = "query_encoding"))] +#[derive(Copy, Clone)] +pub struct EncodingOverride; + +#[cfg(not(feature = "query_encoding"))] +impl EncodingOverride { + #[inline] + pub fn utf8() -> Self { + EncodingOverride + } + + pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { + decode_utf8_lossy(input) + } + + pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { + encode_utf8(input) + } +} + +pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> { + match input { + Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), + Cow::Owned(bytes) => { + let raw_utf8: *const [u8]; + match String::from_utf8_lossy(&bytes) { + Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(), + Cow::Owned(s) => return s.into(), + } + // from_utf8_lossy returned a borrow of `bytes` unchanged. + debug_assert!(raw_utf8 == &*bytes as *const [u8]); + // Reuse the existing `Vec` allocation. + unsafe { String::from_utf8_unchecked(bytes) }.into() + } + } +} + +pub fn encode_utf8(input: Cow<str>) -> Cow<[u8]> { + match input { + Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), + Cow::Owned(s) => Cow::Owned(s.into_bytes()) + } +} |