summaryrefslogtreecommitdiffstats
path: root/third_party/rust/idna/tests/uts46.rs
blob: 038fdf450c08792f6de4f08f65c2a404ca7acceb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
// Copyright 2013-2014 Valentin Gosu.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::char;
use idna::uts46;
use test::TestFn;

pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
    // http://www.unicode.org/Public/idna/latest/IdnaTest.txt
    for (i, line) in include_str!("IdnaTest.txt").lines().enumerate() {
        if line == "" || line.starts_with("#") {
            continue
        }
        // Remove comments
        let mut line = match line.find("#") {
            Some(index) => &line[0..index],
            None => line
        };

        let mut expected_failure = false;
        if line.starts_with("XFAIL") {
            expected_failure = true;
            line = &line[5..line.len()];
        };

        let mut pieces = line.split(';').map(|x| x.trim()).collect::<Vec<&str>>();

        let test_type = pieces.remove(0);
        let original = pieces.remove(0);
        let source = unescape(original);
        let to_unicode = pieces.remove(0);
        let to_ascii = pieces.remove(0);
        let nv8 = if pieces.len() > 0 { pieces.remove(0) } else { "" };

        if expected_failure {
            continue;
        }

        let test_name = format!("UTS #46 line {}", i + 1);
        add_test(test_name, TestFn::dyn_test_fn(move || {
            let result = uts46::to_ascii(&source, uts46::Flags {
                use_std3_ascii_rules: true,
                transitional_processing: test_type == "T",
                verify_dns_length: true,
            });

            if to_ascii.starts_with("[") {
                if to_ascii.starts_with("[C") {
                    // http://unicode.org/reports/tr46/#Deviations
                    // applications that perform IDNA2008 lookup are not required to check
                    // for these contexts
                    return;
                }
                let res = result.ok();
                assert!(res == None, "Expected error. result: {} | original: {} | source: {}",
                        res.unwrap(), original, source);
                return;
            }

            let to_ascii = if to_ascii.len() > 0 {
                to_ascii.to_string()
            } else {
                if to_unicode.len() > 0 {
                    to_unicode.to_string()
                } else {
                    source.clone()
                }
            };

            if nv8 == "NV8" {
                // This result isn't valid under IDNA2008. Skip it
                return;
            }

            assert!(result.is_ok(), "Couldn't parse {} | original: {} | error: {:?}",
                    source, original, result.err());
            let output = result.ok().unwrap();
            assert!(output == to_ascii, "result: {} | expected: {} | original: {} | source: {}",
                    output, to_ascii, original, source);
        }))
    }
}

fn unescape(input: &str) -> String {
    let mut output = String::new();
    let mut chars = input.chars();
    loop {
        match chars.next() {
            None => return output,
            Some(c) =>
                if c == '\\' {
                    match chars.next().unwrap() {
                        '\\' => output.push('\\'),
                        'u' => {
                            let c1 = chars.next().unwrap().to_digit(16).unwrap();
                            let c2 = chars.next().unwrap().to_digit(16).unwrap();
                            let c3 = chars.next().unwrap().to_digit(16).unwrap();
                            let c4 = chars.next().unwrap().to_digit(16).unwrap();
                            match char::from_u32((((c1 * 16 + c2) * 16 + c3) * 16 + c4))
                            {
                                Some(c) => output.push(c),
                                None => { output.push_str(&format!("\\u{:X}{:X}{:X}{:X}",c1,c2,c3,c4)); }
                            };
                        }
                        _ => panic!("Invalid test data input"),
                    }
                } else {
                    output.push(c);
                }
        }
    }
}