# Copyright 2013-2014 Valentin Gosu. # # Licensed under the Apache License, Version 2.0 or the MIT license # , at your # option. This file may not be copied, modified, or distributed # except according to those terms. # Run as: python make_uts46_mapping_table.py IdnaMappingTable.txt > uts46_mapping_table.rs # You can get the latest idna table from # http://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt print('''\ // Copyright 2013-2014 Valentin Gosu. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. // Generated by make_idna_table.py static TABLE: &'static [Range] = &[ ''') txt = open("IdnaMappingTable.txt") def char(s): return (unichr(int(s, 16)) .encode('utf8') .replace('\\', '\\\\') .replace('"', '\\"') .replace('\0', '\\0')) for line in txt: # remove comments line, _, _ = line.partition('#') # skip empty lines if len(line.strip()) == 0: continue fields = line.split(';') if fields[0].strip() == 'D800..DFFF': continue # Surrogates don't occur in Rust strings. first, _, last = fields[0].strip().partition('..') if not last: last = first mapping = fields[1].strip().replace('_', ' ').title().replace(' ', '') if len(fields) > 2: if fields[2].strip(): mapping += '("%s")' % ''.join(char(c) for c in fields[2].strip().split(' ')) elif mapping == "Deviation": mapping += '("")' print(" Range { from: '%s', to: '%s', mapping: %s }," % (char(first), char(last), mapping)) print("];")