summaryrefslogtreecommitdiffstats
path: root/third_party/rust/idna/src/uts46.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/idna/src/uts46.rs')
-rw-r--r--third_party/rust/idna/src/uts46.rs322
1 files changed, 0 insertions, 322 deletions
diff --git a/third_party/rust/idna/src/uts46.rs b/third_party/rust/idna/src/uts46.rs
deleted file mode 100644
index 5f230e0e4..000000000
--- a/third_party/rust/idna/src/uts46.rs
+++ /dev/null
@@ -1,322 +0,0 @@
-// Copyright 2013-2014 Valentin Gosu.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-//! [*Unicode IDNA Compatibility Processing*
-//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
-
-use self::Mapping::*;
-use punycode;
-use std::ascii::AsciiExt;
-use unicode_normalization::UnicodeNormalization;
-use unicode_normalization::char::is_combining_mark;
-use unicode_bidi::{BidiClass, bidi_class};
-
-include!("uts46_mapping_table.rs");
-
-#[derive(Debug)]
-enum Mapping {
- Valid,
- Ignored,
- Mapped(&'static str),
- Deviation(&'static str),
- Disallowed,
- DisallowedStd3Valid,
- DisallowedStd3Mapped(&'static str),
-}
-
-struct Range {
- from: char,
- to: char,
- mapping: Mapping,
-}
-
-fn find_char(codepoint: char) -> &'static Mapping {
- let mut min = 0;
- let mut max = TABLE.len() - 1;
- while max > min {
- let mid = (min + max) >> 1;
- if codepoint > TABLE[mid].to {
- min = mid;
- } else if codepoint < TABLE[mid].from {
- max = mid;
- } else {
- min = mid;
- max = mid;
- }
- }
- &TABLE[min].mapping
-}
-
-fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec<Error>) {
- match *find_char(codepoint) {
- Mapping::Valid => output.push(codepoint),
- Mapping::Ignored => {},
- Mapping::Mapped(mapping) => output.push_str(mapping),
- Mapping::Deviation(mapping) => {
- if flags.transitional_processing {
- output.push_str(mapping)
- } else {
- output.push(codepoint)
- }
- }
- Mapping::Disallowed => {
- errors.push(Error::DissallowedCharacter);
- output.push(codepoint);
- }
- Mapping::DisallowedStd3Valid => {
- if flags.use_std3_ascii_rules {
- errors.push(Error::DissallowedByStd3AsciiRules);
- }
- output.push(codepoint)
- }
- Mapping::DisallowedStd3Mapped(mapping) => {
- if flags.use_std3_ascii_rules {
- errors.push(Error::DissallowedMappedInStd3);
- }
- output.push_str(mapping)
- }
- }
-}
-
-// http://tools.ietf.org/html/rfc5893#section-2
-fn passes_bidi(label: &str, transitional_processing: bool) -> bool {
- let mut chars = label.chars();
- let class = match chars.next() {
- Some(c) => bidi_class(c),
- None => return true, // empty string
- };
-
- if class == BidiClass::L
- || (class == BidiClass::ON && transitional_processing) // starts with \u200D
- || (class == BidiClass::ES && transitional_processing) // hack: 1.35.+33.49
- || class == BidiClass::EN // hack: starts with number 0à.\u05D0
- { // LTR
- // Rule 5
- loop {
- match chars.next() {
- Some(c) => {
- let c = bidi_class(c);
- if !matches!(c, BidiClass::L | BidiClass::EN |
- BidiClass::ES | BidiClass::CS |
- BidiClass::ET | BidiClass::ON |
- BidiClass::BN | BidiClass::NSM) {
- return false;
- }
- },
- None => { break; },
- }
- }
-
- // Rule 6
- let mut rev_chars = label.chars().rev();
- let mut last = rev_chars.next();
- loop { // must end in L or EN followed by 0 or more NSM
- match last {
- Some(c) if bidi_class(c) == BidiClass::NSM => {
- last = rev_chars.next();
- continue;
- }
- _ => { break; },
- }
- }
-
- // TODO: does not pass for àˇ.\u05D0
- // match last {
- // Some(c) if bidi_class(c) == BidiClass::L
- // || bidi_class(c) == BidiClass::EN => {},
- // Some(c) => { return false; },
- // _ => {}
- // }
-
- } else if class == BidiClass::R || class == BidiClass::AL { // RTL
- let mut found_en = false;
- let mut found_an = false;
-
- // Rule 2
- loop {
- match chars.next() {
- Some(c) => {
- let char_class = bidi_class(c);
-
- if char_class == BidiClass::EN {
- found_en = true;
- }
- if char_class == BidiClass::AN {
- found_an = true;
- }
-
- if !matches!(char_class, BidiClass::R | BidiClass::AL |
- BidiClass::AN | BidiClass::EN |
- BidiClass::ES | BidiClass::CS |
- BidiClass::ET | BidiClass::ON |
- BidiClass::BN | BidiClass::NSM) {
- return false;
- }
- },
- None => { break; },
- }
- }
- // Rule 3
- let mut rev_chars = label.chars().rev();
- let mut last = rev_chars.next();
- loop { // must end in L or EN followed by 0 or more NSM
- match last {
- Some(c) if bidi_class(c) == BidiClass::NSM => {
- last = rev_chars.next();
- continue;
- }
- _ => { break; },
- }
- }
- match last {
- Some(c) if matches!(bidi_class(c), BidiClass::R | BidiClass::AL |
- BidiClass::EN | BidiClass::AN) => {},
- _ => { return false; }
- }
-
- // Rule 4
- if found_an && found_en {
- return false;
- }
- } else {
- // Rule 2: Should start with L or R/AL
- return false;
- }
-
- return true;
-}
-
-/// http://www.unicode.org/reports/tr46/#Validity_Criteria
-fn validate(label: &str, flags: Flags, errors: &mut Vec<Error>) {
- if label.nfc().ne(label.chars()) {
- errors.push(Error::ValidityCriteria);
- }
-
- // Can not contain '.' since the input is from .split('.')
- if {
- let mut chars = label.chars().skip(2);
- let third = chars.next();
- let fourth = chars.next();
- (third, fourth) == (Some('-'), Some('-'))
- } || label.starts_with("-")
- || label.ends_with("-")
- || label.chars().next().map_or(false, is_combining_mark)
- || label.chars().any(|c| match *find_char(c) {
- Mapping::Valid => false,
- Mapping::Deviation(_) => flags.transitional_processing,
- Mapping::DisallowedStd3Valid => flags.use_std3_ascii_rules,
- _ => true,
- })
- || !passes_bidi(label, flags.transitional_processing)
- {
- errors.push(Error::ValidityCriteria)
- }
-}
-
-/// http://www.unicode.org/reports/tr46/#Processing
-fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
- let mut mapped = String::new();
- for c in domain.chars() {
- map_char(c, flags, &mut mapped, errors)
- }
- let normalized: String = mapped.nfc().collect();
- let mut validated = String::new();
- for label in normalized.split('.') {
- if validated.len() > 0 {
- validated.push('.');
- }
- if label.starts_with("xn--") {
- match punycode::decode_to_string(&label["xn--".len()..]) {
- Some(decoded_label) => {
- let flags = Flags { transitional_processing: false, ..flags };
- validate(&decoded_label, flags, errors);
- validated.push_str(&decoded_label)
- }
- None => errors.push(Error::PunycodeError)
- }
- } else {
- validate(label, flags, errors);
- validated.push_str(label)
- }
- }
- validated
-}
-
-#[derive(Copy, Clone)]
-pub struct Flags {
- pub use_std3_ascii_rules: bool,
- pub transitional_processing: bool,
- pub verify_dns_length: bool,
-}
-
-#[derive(PartialEq, Eq, Clone, Copy, Debug)]
-enum Error {
- PunycodeError,
- ValidityCriteria,
- DissallowedByStd3AsciiRules,
- DissallowedMappedInStd3,
- DissallowedCharacter,
- TooLongForDns,
-}
-
-/// Errors recorded during UTS #46 processing.
-///
-/// This is opaque for now, only indicating the precense of at least one error.
-/// More details may be exposed in the future.
-#[derive(Debug)]
-pub struct Errors(Vec<Error>);
-
-/// http://www.unicode.org/reports/tr46/#ToASCII
-pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
- let mut errors = Vec::new();
- let mut result = String::new();
- for label in processing(domain, flags, &mut errors).split('.') {
- if result.len() > 0 {
- result.push('.');
- }
- if label.is_ascii() {
- result.push_str(label);
- } else {
- match punycode::encode_str(label) {
- Some(x) => {
- result.push_str("xn--");
- result.push_str(&x);
- },
- None => errors.push(Error::PunycodeError)
- }
- }
- }
-
- if flags.verify_dns_length {
- let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result };
- if domain.len() < 1 || domain.len() > 253 ||
- domain.split('.').any(|label| label.len() < 1 || label.len() > 63) {
- errors.push(Error::TooLongForDns)
- }
- }
- if errors.is_empty() {
- Ok(result)
- } else {
- Err(Errors(errors))
- }
-}
-
-/// http://www.unicode.org/reports/tr46/#ToUnicode
-///
-/// Only `use_std3_ascii_rules` is used in `flags`.
-pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) {
- flags.transitional_processing = false;
- let mut errors = Vec::new();
- let domain = processing(domain, flags, &mut errors);
- let errors = if errors.is_empty() {
- Ok(())
- } else {
- Err(Errors(errors))
- };
- (domain, errors)
-}