diff options
Diffstat (limited to 'third_party/rust/idna/src/uts46.rs')
-rw-r--r-- | third_party/rust/idna/src/uts46.rs | 322 |
1 files changed, 0 insertions, 322 deletions
diff --git a/third_party/rust/idna/src/uts46.rs b/third_party/rust/idna/src/uts46.rs deleted file mode 100644 index 5f230e0e4..000000000 --- a/third_party/rust/idna/src/uts46.rs +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright 2013-2014 Valentin Gosu. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! [*Unicode IDNA Compatibility Processing* -//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/) - -use self::Mapping::*; -use punycode; -use std::ascii::AsciiExt; -use unicode_normalization::UnicodeNormalization; -use unicode_normalization::char::is_combining_mark; -use unicode_bidi::{BidiClass, bidi_class}; - -include!("uts46_mapping_table.rs"); - -#[derive(Debug)] -enum Mapping { - Valid, - Ignored, - Mapped(&'static str), - Deviation(&'static str), - Disallowed, - DisallowedStd3Valid, - DisallowedStd3Mapped(&'static str), -} - -struct Range { - from: char, - to: char, - mapping: Mapping, -} - -fn find_char(codepoint: char) -> &'static Mapping { - let mut min = 0; - let mut max = TABLE.len() - 1; - while max > min { - let mid = (min + max) >> 1; - if codepoint > TABLE[mid].to { - min = mid; - } else if codepoint < TABLE[mid].from { - max = mid; - } else { - min = mid; - max = mid; - } - } - &TABLE[min].mapping -} - -fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec<Error>) { - match *find_char(codepoint) { - Mapping::Valid => output.push(codepoint), - Mapping::Ignored => {}, - Mapping::Mapped(mapping) => output.push_str(mapping), - Mapping::Deviation(mapping) => { - if flags.transitional_processing { - output.push_str(mapping) - } else { - output.push(codepoint) - } - } - Mapping::Disallowed => { - errors.push(Error::DissallowedCharacter); - output.push(codepoint); - } - Mapping::DisallowedStd3Valid => { - if flags.use_std3_ascii_rules { - errors.push(Error::DissallowedByStd3AsciiRules); - } - output.push(codepoint) - } - Mapping::DisallowedStd3Mapped(mapping) => { - if flags.use_std3_ascii_rules { - errors.push(Error::DissallowedMappedInStd3); - } - output.push_str(mapping) - } - } -} - -// http://tools.ietf.org/html/rfc5893#section-2 -fn passes_bidi(label: &str, transitional_processing: bool) -> bool { - let mut chars = label.chars(); - let class = match chars.next() { - Some(c) => bidi_class(c), - None => return true, // empty string - }; - - if class == BidiClass::L - || (class == BidiClass::ON && transitional_processing) // starts with \u200D - || (class == BidiClass::ES && transitional_processing) // hack: 1.35.+33.49 - || class == BidiClass::EN // hack: starts with number 0à.\u05D0 - { // LTR - // Rule 5 - loop { - match chars.next() { - Some(c) => { - let c = bidi_class(c); - if !matches!(c, BidiClass::L | BidiClass::EN | - BidiClass::ES | BidiClass::CS | - BidiClass::ET | BidiClass::ON | - BidiClass::BN | BidiClass::NSM) { - return false; - } - }, - None => { break; }, - } - } - - // Rule 6 - let mut rev_chars = label.chars().rev(); - let mut last = rev_chars.next(); - loop { // must end in L or EN followed by 0 or more NSM - match last { - Some(c) if bidi_class(c) == BidiClass::NSM => { - last = rev_chars.next(); - continue; - } - _ => { break; }, - } - } - - // TODO: does not pass for àˇ.\u05D0 - // match last { - // Some(c) if bidi_class(c) == BidiClass::L - // || bidi_class(c) == BidiClass::EN => {}, - // Some(c) => { return false; }, - // _ => {} - // } - - } else if class == BidiClass::R || class == BidiClass::AL { // RTL - let mut found_en = false; - let mut found_an = false; - - // Rule 2 - loop { - match chars.next() { - Some(c) => { - let char_class = bidi_class(c); - - if char_class == BidiClass::EN { - found_en = true; - } - if char_class == BidiClass::AN { - found_an = true; - } - - if !matches!(char_class, BidiClass::R | BidiClass::AL | - BidiClass::AN | BidiClass::EN | - BidiClass::ES | BidiClass::CS | - BidiClass::ET | BidiClass::ON | - BidiClass::BN | BidiClass::NSM) { - return false; - } - }, - None => { break; }, - } - } - // Rule 3 - let mut rev_chars = label.chars().rev(); - let mut last = rev_chars.next(); - loop { // must end in L or EN followed by 0 or more NSM - match last { - Some(c) if bidi_class(c) == BidiClass::NSM => { - last = rev_chars.next(); - continue; - } - _ => { break; }, - } - } - match last { - Some(c) if matches!(bidi_class(c), BidiClass::R | BidiClass::AL | - BidiClass::EN | BidiClass::AN) => {}, - _ => { return false; } - } - - // Rule 4 - if found_an && found_en { - return false; - } - } else { - // Rule 2: Should start with L or R/AL - return false; - } - - return true; -} - -/// http://www.unicode.org/reports/tr46/#Validity_Criteria -fn validate(label: &str, flags: Flags, errors: &mut Vec<Error>) { - if label.nfc().ne(label.chars()) { - errors.push(Error::ValidityCriteria); - } - - // Can not contain '.' since the input is from .split('.') - if { - let mut chars = label.chars().skip(2); - let third = chars.next(); - let fourth = chars.next(); - (third, fourth) == (Some('-'), Some('-')) - } || label.starts_with("-") - || label.ends_with("-") - || label.chars().next().map_or(false, is_combining_mark) - || label.chars().any(|c| match *find_char(c) { - Mapping::Valid => false, - Mapping::Deviation(_) => flags.transitional_processing, - Mapping::DisallowedStd3Valid => flags.use_std3_ascii_rules, - _ => true, - }) - || !passes_bidi(label, flags.transitional_processing) - { - errors.push(Error::ValidityCriteria) - } -} - -/// http://www.unicode.org/reports/tr46/#Processing -fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String { - let mut mapped = String::new(); - for c in domain.chars() { - map_char(c, flags, &mut mapped, errors) - } - let normalized: String = mapped.nfc().collect(); - let mut validated = String::new(); - for label in normalized.split('.') { - if validated.len() > 0 { - validated.push('.'); - } - if label.starts_with("xn--") { - match punycode::decode_to_string(&label["xn--".len()..]) { - Some(decoded_label) => { - let flags = Flags { transitional_processing: false, ..flags }; - validate(&decoded_label, flags, errors); - validated.push_str(&decoded_label) - } - None => errors.push(Error::PunycodeError) - } - } else { - validate(label, flags, errors); - validated.push_str(label) - } - } - validated -} - -#[derive(Copy, Clone)] -pub struct Flags { - pub use_std3_ascii_rules: bool, - pub transitional_processing: bool, - pub verify_dns_length: bool, -} - -#[derive(PartialEq, Eq, Clone, Copy, Debug)] -enum Error { - PunycodeError, - ValidityCriteria, - DissallowedByStd3AsciiRules, - DissallowedMappedInStd3, - DissallowedCharacter, - TooLongForDns, -} - -/// Errors recorded during UTS #46 processing. -/// -/// This is opaque for now, only indicating the precense of at least one error. -/// More details may be exposed in the future. -#[derive(Debug)] -pub struct Errors(Vec<Error>); - -/// http://www.unicode.org/reports/tr46/#ToASCII -pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> { - let mut errors = Vec::new(); - let mut result = String::new(); - for label in processing(domain, flags, &mut errors).split('.') { - if result.len() > 0 { - result.push('.'); - } - if label.is_ascii() { - result.push_str(label); - } else { - match punycode::encode_str(label) { - Some(x) => { - result.push_str("xn--"); - result.push_str(&x); - }, - None => errors.push(Error::PunycodeError) - } - } - } - - if flags.verify_dns_length { - let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; - if domain.len() < 1 || domain.len() > 253 || - domain.split('.').any(|label| label.len() < 1 || label.len() > 63) { - errors.push(Error::TooLongForDns) - } - } - if errors.is_empty() { - Ok(result) - } else { - Err(Errors(errors)) - } -} - -/// http://www.unicode.org/reports/tr46/#ToUnicode -/// -/// Only `use_std3_ascii_rules` is used in `flags`. -pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) { - flags.transitional_processing = false; - let mut errors = Vec::new(); - let domain = processing(domain, flags, &mut errors); - let errors = if errors.is_empty() { - Ok(()) - } else { - Err(Errors(errors)) - }; - (domain, errors) -} |