Add m-esr52 at 52.6.0

author: Matt A. Tobin <mattatobin@localhost.localdomain> 2018-02-02 04:16:08 -0500
committer: Matt A. Tobin <mattatobin@localhost.localdomain> 2018-02-02 04:16:08 -0500
commit: 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree: 10027f336435511475e392454359edea8e25895d /js/src/builtin/make_intl_data.py
parent: 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
download: UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
1 files changed, 992 insertions, 0 deletions
diff --git a/js/src/builtin/make_intl_data.py b/js/src/builtin/make_intl_data.py
new file mode 100755
index 000000000..b81d5951f
--- /dev/null
+++ b/js/src/builtin/make_intl_data.py
@@ -0,0 +1,992 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+""" Usage:
+    make_intl_data.py langtags [language-subtag-registry.txt]
+    make_intl_data.py tzdata
+
+    Target "langtags":
+    This script extracts information about mappings between deprecated and
+    current BCP 47 language tags from the IANA Language Subtag Registry and
+    converts it to JavaScript object definitions in IntlData.js. The definitions
+    are used in Intl.js.
+
+    The IANA Language Subtag Registry is imported from
+    https://www.iana.org/assignments/language-subtag-registry
+    and uses the syntax specified in
+    https://tools.ietf.org/html/rfc5646#section-3
+
+
+    Target "tzdata":
+    This script computes which time zone informations are not up-to-date in ICU
+    and provides the necessary mappings to workaround this problem.
+    https://ssl.icu-project.org/trac/ticket/12044
+"""
+
+from __future__ import print_function
+import os
+import re
+import io
+import codecs
+import sys
+import tarfile
+import tempfile
+import urllib2
+import urlparse
+from contextlib import closing
+from functools import partial
+from itertools import chain, ifilter, ifilterfalse, imap, tee
+from operator import attrgetter, itemgetter
+
+def readRegistryRecord(registry):
+    """ Yields the records of the IANA Language Subtag Registry as dictionaries. """
+    record = {}
+    for line in registry:
+        line = line.strip()
+        if line == "":
+            continue
+        if line == "%%":
+            yield record
+            record = {}
+        else:
+            if ":" in line:
+                key, value = line.split(":", 1)
+                key, value = key.strip(), value.strip()
+                record[key] = value
+            else:
+                # continuation line
+                record[key] += " " + line
+    if record:
+        yield record
+    return
+
+
+def readRegistry(registry):
+    """ Reads IANA Language Subtag Registry and extracts information for Intl.js.
+
+        Information extracted:
+        - langTagMappings: mappings from complete language tags to preferred
+          complete language tags
+        - langSubtagMappings: mappings from subtags to preferred subtags
+        - extlangMappings: mappings from extlang subtags to preferred subtags,
+          with prefix to be removed
+        Returns these three mappings as dictionaries, along with the registry's
+        file date.
+
+        We also check that mappings for language subtags don't affect extlang
+        subtags and vice versa, so that CanonicalizeLanguageTag doesn't have
+        to separate them for processing. Region codes are separated by case,
+        and script codes by length, so they're unproblematic.
+    """
+    langTagMappings = {}
+    langSubtagMappings = {}
+    extlangMappings = {}
+    languageSubtags = set()
+    extlangSubtags = set()
+
+    for record in readRegistryRecord(registry):
+        if "File-Date" in record:
+            fileDate = record["File-Date"]
+            continue
+
+        if record["Type"] == "grandfathered":
+            # Grandfathered tags don't use standard syntax, so
+            # CanonicalizeLanguageTag expects the mapping table to provide
+            # the final form for all.
+            # For langTagMappings, keys must be in lower case; values in
+            # the case used in the registry.
+            tag = record["Tag"]
+            if "Preferred-Value" in record:
+                langTagMappings[tag.lower()] = record["Preferred-Value"]
+            else:
+                langTagMappings[tag.lower()] = tag
+        elif record["Type"] == "redundant":
+            # For langTagMappings, keys must be in lower case; values in
+            # the case used in the registry.
+            if "Preferred-Value" in record:
+                langTagMappings[record["Tag"].lower()] = record["Preferred-Value"]
+        elif record["Type"] in ("language", "script", "region", "variant"):
+            # For langSubtagMappings, keys and values must be in the case used
+            # in the registry.
+            subtag = record["Subtag"]
+            if record["Type"] == "language":
+                languageSubtags.add(subtag)
+            if "Preferred-Value" in record:
+                if subtag == "heploc":
+                    # The entry for heploc is unique in its complexity; handle
+                    # it as special case below.
+                    continue
+                if "Prefix" in record:
+                    # This might indicate another heploc-like complex case.
+                    raise Exception("Please evaluate: subtag mapping with prefix value.")
+                langSubtagMappings[subtag] = record["Preferred-Value"]
+        elif record["Type"] == "extlang":
+            # For extlangMappings, keys must be in the case used in the
+            # registry; values are records with the preferred value and the
+            # prefix to be removed.
+            subtag = record["Subtag"]
+            extlangSubtags.add(subtag)
+            if "Preferred-Value" in record:
+                preferred = record["Preferred-Value"]
+                prefix = record["Prefix"]
+                extlangMappings[subtag] = {"preferred": preferred, "prefix": prefix}
+        else:
+            # No other types are allowed by
+            # https://tools.ietf.org/html/rfc5646#section-3.1.3
+            assert False, "Unrecognized Type: {0}".format(record["Type"])
+
+    # Check that mappings for language subtags and extlang subtags don't affect
+    # each other.
+    for lang in languageSubtags:
+        if lang in extlangMappings and extlangMappings[lang]["preferred"] != lang:
+            raise Exception("Conflict: lang with extlang mapping: " + lang)
+    for extlang in extlangSubtags:
+        if extlang in langSubtagMappings:
+            raise Exception("Conflict: extlang with lang mapping: " + extlang)
+
+    # Special case for heploc.
+    langTagMappings["ja-latn-hepburn-heploc"] = "ja-Latn-alalc97"
+
+    return {"fileDate": fileDate,
+            "langTagMappings": langTagMappings,
+            "langSubtagMappings": langSubtagMappings,
+            "extlangMappings": extlangMappings}
+
+
+def writeMappingsVar(intlData, dict, name, description, fileDate, url):
+    """ Writes a variable definition with a mapping table to file intlData.
+
+        Writes the contents of dictionary dict to file intlData with the given
+        variable name and a comment with description, fileDate, and URL.
+    """
+    intlData.write("\n")
+    intlData.write("// {0}.\n".format(description))
+    intlData.write("// Derived from IANA Language Subtag Registry, file date {0}.\n".format(fileDate))
+    intlData.write("// {0}\n".format(url))
+    intlData.write("var {0} = {{\n".format(name))
+    keys = sorted(dict)
+    for key in keys:
+        if isinstance(dict[key], basestring):
+            value = '"{0}"'.format(dict[key])
+        else:
+            preferred = dict[key]["preferred"]
+            prefix = dict[key]["prefix"]
+            value = '{{preferred: "{0}", prefix: "{1}"}}'.format(preferred, prefix)
+        intlData.write('    "{0}": {1},\n'.format(key, value))
+    intlData.write("};\n")
+
+
+def writeLanguageTagData(intlData, fileDate, url, langTagMappings, langSubtagMappings, extlangMappings):
+    """ Writes the language tag data to the Intl data file. """
+    writeMappingsVar(intlData, langTagMappings, "langTagMappings",
+                     "Mappings from complete tags to preferred values", fileDate, url)
+    writeMappingsVar(intlData, langSubtagMappings, "langSubtagMappings",
+                     "Mappings from non-extlang subtags to preferred values", fileDate, url)
+    writeMappingsVar(intlData, extlangMappings, "extlangMappings",
+                     "Mappings from extlang subtags to preferred values", fileDate, url)
+
+def updateLangTags(args):
+    """ Update the IntlData.js file. """
+    url = args.url
+    out = args.out
+    filename = args.file
+
+    print("Arguments:")
+    print("\tDownload url: %s" % url)
+    print("\tLocal registry: %s" % filename)
+    print("\tOutput file: %s" % out)
+    print("")
+
+    if filename is not None:
+        print("Always make sure you have the newest language-subtag-registry.txt!")
+        registry = codecs.open(filename, "r", encoding="utf-8")
+    else:
+        print("Downloading IANA Language Subtag Registry...")
+        with closing(urllib2.urlopen(url)) as reader:
+            text = reader.read().decode("utf-8")
+        registry = codecs.open("language-subtag-registry.txt", "w+", encoding="utf-8")
+        registry.write(text)
+        registry.seek(0)
+
+    print("Processing IANA Language Subtag Registry...")
+    with closing(registry) as reg:
+        data = readRegistry(reg)
+    fileDate = data["fileDate"]
+    langTagMappings = data["langTagMappings"]
+    langSubtagMappings = data["langSubtagMappings"]
+    extlangMappings = data["extlangMappings"]
+
+    print("Writing Intl data...")
+    with codecs.open(out, "w", encoding="utf-8") as intlData:
+        intlData.write("// Generated by make_intl_data.py. DO NOT EDIT.\n")
+        writeLanguageTagData(intlData, fileDate, url, langTagMappings, langSubtagMappings, extlangMappings)
+
+def flines(filepath, encoding="utf-8"):
+    """ Open filepath and iterate over its content. """
+    with io.open(filepath, mode="r", encoding=encoding) as f:
+        for line in f:
+            yield line
+
+class Zone:
+    """ Time zone with optional file name. """
+
+    def __init__(self, name, filename=""):
+        self.name = name
+        self.filename = filename
+    def __eq__(self, other):
+        return hasattr(other, "name") and self.name == other.name
+    def __cmp__(self, other):
+        if self.name == other.name:
+            return 0
+        if self.name < other.name:
+            return -1
+        return 1
+    def __hash__(self):
+        return hash(self.name)
+    def __str__(self):
+        return self.name
+    def __repr__(self):
+        return self.name
+
+class TzDataDir:
+    """ tzdata source from a directory. """
+
+    def __init__(self, obj):
+        self.name = partial(os.path.basename, obj)
+        self.resolve = partial(os.path.join, obj)
+        self.basename = os.path.basename
+        self.isfile = os.path.isfile
+        self.listdir = partial(os.listdir, obj)
+        self.readlines = flines
+
+class TzDataFile:
+    """ tzdata source from a file (tar or gzipped). """
+
+    def __init__(self, obj):
+        self.name = lambda: os.path.splitext(os.path.splitext(os.path.basename(obj))[0])[0]
+        self.resolve = obj.getmember
+        self.basename = attrgetter("name")
+        self.isfile = tarfile.TarInfo.isfile
+        self.listdir = obj.getnames
+        self.readlines = partial(self._tarlines, obj)
+
+    def _tarlines(self, tar, m):
+        with closing(tar.extractfile(m)) as f:
+            for line in codecs.EncodedFile(f, "utf-8"):
+                yield line
+
+def validateTimeZones(zones, links):
+    """ Validate the zone and link entries. """
+    linkZones = set(links.viewkeys())
+    intersect = linkZones.intersection(zones)
+    if intersect:
+        raise RuntimeError("Links also present in zones: %s" % intersect)
+
+    zoneNames = set(z.name for z in zones)
+    linkTargets = set(links.viewvalues())
+    if not linkTargets.issubset(zoneNames):
+        raise RuntimeError("Link targets not found: %s" % linkTargets.difference(zoneNames))
+
+def partition(iterable, *predicates):
+    def innerPartition(pred, it):
+        it1, it2 = tee(it)
+        return (ifilter(pred, it1), ifilterfalse(pred, it2))
+    if len(predicates) == 0:
+        return iterable
+    (left, right) = innerPartition(predicates[0], iterable)
+    if len(predicates) == 1:
+        return (left, right)
+    return tuple([left] + list(partition(right, *predicates[1:])))
+
+def listIANAFiles(tzdataDir):
+    def isTzFile(d, m, f):
+        return m(f) and d.isfile(d.resolve(f))
+    return ifilter(partial(isTzFile, tzdataDir, re.compile("^[a-z0-9]+$").match), tzdataDir.listdir())
+
+def readIANAFiles(tzdataDir, files):
+    """ Read all IANA time zone files from the given iterable. """
+    nameSyntax = "[\w/+\-]+"
+    pZone = re.compile(r"Zone\s+(?P<name>%s)\s+.*" % nameSyntax)
+    pLink = re.compile(r"Link\s+(?P<target>%s)\s+(?P<name>%s)(?:\s+#.*)?" % (nameSyntax, nameSyntax))
+
+    def createZone(line, fname):
+        match = pZone.match(line)
+        name = match.group("name")
+        return Zone(name, fname)
+
+    def createLink(line, fname):
+        match = pLink.match(line)
+        (name, target) = match.group("name", "target")
+        return (Zone(name, fname), target)
+
+    zones = set()
+    links = dict()
+    for filename in files:
+        filepath = tzdataDir.resolve(filename)
+        for line in tzdataDir.readlines(filepath):
+            if line.startswith("Zone"):
+                zones.add(createZone(line, filename))
+            if line.startswith("Link"):
+                (link, target) = createLink(line, filename)
+                links[link] = target
+
+    return (zones, links)
+
+def readIANATimeZones(tzdataDir, ignoreBackzone, ignoreFactory):
+    """ Read the IANA time zone information from `tzdataDir`. """
+
+    backzoneFiles = {"backzone"}
+    (bkfiles, tzfiles) = partition(listIANAFiles(tzdataDir), backzoneFiles.__contains__)
+
+    # Read zone and link infos.
+    (zones, links) = readIANAFiles(tzdataDir, tzfiles)
+    (backzones, backlinks) = readIANAFiles(tzdataDir, bkfiles)
+
+    # Remove the placeholder time zone "Factory".
+    if ignoreFactory:
+        zones.remove(Zone("Factory"))
+
+    # Merge with backzone data.
+    if not ignoreBackzone:
+        zones |= backzones
+        links = {name: target for name, target in links.iteritems() if name not in backzones}
+        links.update(backlinks)
+
+    validateTimeZones(zones, links)
+
+    return (zones, links)
+
+def readICUResourceFile(filename):
+    """ Read an ICU resource file.
+
+        Yields (<table-name>, <startOrEnd>, <value>) for each table.
+    """
+
+    numberValue = r"-?\d+"
+    stringValue = r'".+?"'
+    asVector = lambda val: r"%s(?:\s*,\s*%s)*" % (val, val)
+    numberVector = asVector(numberValue)
+    stringVector = asVector(stringValue)
+
+    reNumberVector = re.compile(numberVector)
+    reStringVector = re.compile(stringVector)
+    reNumberValue = re.compile(numberValue)
+    reStringValue = re.compile(stringValue)
+    def parseValue(value):
+        m = reNumberVector.match(value)
+        if m:
+            return [int(v) for v in reNumberValue.findall(value)]
+        m = reStringVector.match(value)
+        if m:
+            return [v[1:-1] for v in reStringValue.findall(value)]
+        raise RuntimeError("unknown value type: %s" % value)
+
+    def extractValue(values):
+        if len(values) == 0:
+            return None
+        if len(values) == 1:
+            return values[0]
+        return values
+
+    def line(*args):
+        maybeMultiComments = r"(?:/\*[^*]*\*/)*"
+        maybeSingleComment = r"(?://.*)?"
+        lineStart = "^%s" % maybeMultiComments
+        lineEnd = "%s\s*%s$" % (maybeMultiComments, maybeSingleComment)
+        return re.compile(r"\s*".join(chain([lineStart], args, [lineEnd])))
+
+    tableName = r'(?P<quote>"?)(?P<name>.+?)(?P=quote)'
+    tableValue = r"(?P<value>%s|%s)" % (numberVector, stringVector)
+
+    reStartTable = line(tableName, r"\{")
+    reEndTable = line(r"\}")
+    reSingleValue = line(r",?", tableValue, r",?")
+    reCompactTable = line(tableName, r"\{", tableValue, r"\}")
+    reEmptyLine = line()
+
+    tables = []
+    currentTable = lambda: "|".join(tables)
+    values = []
+    for line in flines(filename, "utf-8-sig"):
+        line = line.strip()
+        if line == "":
+            continue
+
+        m = reEmptyLine.match(line)
+        if m:
+            continue
+
+        m = reStartTable.match(line)
+        if m:
+            assert len(values) == 0
+            tables.append(m.group("name"))
+            continue
+
+        m = reEndTable.match(line)
+        if m:
+            yield (currentTable(), extractValue(values))
+            tables.pop()
+            values = []
+            continue
+
+        m = reCompactTable.match(line)
+        if m:
+            assert len(values) == 0
+            tables.append(m.group("name"))
+            yield (currentTable(), extractValue(parseValue(m.group("value"))))
+            tables.pop()
+            continue
+
+        m = reSingleValue.match(line)
+        if m and tables:
+            values.extend(parseValue(m.group("value")))
+            continue
+
+        raise RuntimeError("unknown entry: %s" % line)
+
+def readICUTimeZonesFromTimezoneTypes(icuTzDir):
+    """ Read the ICU time zone information from `icuTzDir`/timezoneTypes.txt
+        and returns the tuple (zones, links).
+    """
+    typeMapTimeZoneKey = "timezoneTypes:table(nofallback)|typeMap|timezone|"
+    typeAliasTimeZoneKey = "timezoneTypes:table(nofallback)|typeAlias|timezone|"
+    toTimeZone = lambda name: Zone(name.replace(":", "/"))
+
+    zones = set()
+    links = dict()
+
+    for name, value in readICUResourceFile(os.path.join(icuTzDir, "timezoneTypes.txt")):
+        if name.startswith(typeMapTimeZoneKey):
+            zones.add(toTimeZone(name[len(typeMapTimeZoneKey):]))
+        if name.startswith(typeAliasTimeZoneKey):
+            links[toTimeZone(name[len(typeAliasTimeZoneKey):])] = value
+
+    # Remove the ICU placeholder time zone "Etc/Unknown".
+    zones.remove(Zone("Etc/Unknown"))
+
+    # tzdata2017c removed the link Canada/East-Saskatchewan -> America/Regina,
+    # but it is still present in ICU sources. Manually remove it to keep our
+    # tables consistent with IANA.
+    del links[Zone("Canada/East-Saskatchewan")]
+
+    validateTimeZones(zones, links)
+
+    return (zones, links)
+
+def readICUTimeZonesFromZoneInfo(icuTzDir, ignoreFactory):
+    """ Read the ICU time zone information from `icuTzDir`/zoneinfo64.txt
+        and returns the tuple (zones, links).
+    """
+    zoneKey = "zoneinfo64:table(nofallback)|Zones:array|:table"
+    linkKey = "zoneinfo64:table(nofallback)|Zones:array|:int"
+    namesKey = "zoneinfo64:table(nofallback)|Names"
+
+    tzId = 0
+    tzLinks = dict()
+    tzNames = []
+
+    for name, value in readICUResourceFile(os.path.join(icuTzDir, "zoneinfo64.txt")):
+        if name == zoneKey:
+            tzId += 1
+        elif name == linkKey:
+            tzLinks[tzId] = int(value)
+            tzId += 1
+        elif name == namesKey:
+            tzNames.extend(value)
+
+    links = dict((Zone(tzNames[zone]), tzNames[target]) for (zone, target) in tzLinks.iteritems())
+    zones = set([Zone(v) for v in tzNames if Zone(v) not in links])
+
+    # Remove the ICU placeholder time zone "Etc/Unknown".
+    zones.remove(Zone("Etc/Unknown"))
+
+    # tzdata2017c removed the link Canada/East-Saskatchewan -> America/Regina,
+    # but it is still present in ICU sources. Manually remove it to keep our
+    # tables consistent with IANA.
+    del links[Zone("Canada/East-Saskatchewan")]
+
+    # Remove the placeholder time zone "Factory".
+    if ignoreFactory:
+        zones.remove(Zone("Factory"))
+
+    validateTimeZones(zones, links)
+
+    return (zones, links)
+
+def readICUTimeZones(icuDir, icuTzDir, ignoreFactory):
+    # zoneinfo64.txt contains the supported time zones by ICU. This data is
+    # generated from tzdata files, it doesn't include "backzone" in stock ICU.
+    (zoneinfoZones, zoneinfoLinks) = readICUTimeZonesFromZoneInfo(icuTzDir, ignoreFactory)
+
+    # timezoneTypes.txt contains the canonicalization information for ICU. This
+    # data is generated from CLDR files. It includes data about time zones from
+    # tzdata's "backzone" file.
+    (typesZones, typesLinks) = readICUTimeZonesFromTimezoneTypes(icuTzDir)
+
+    # Information in zoneinfo64 should be a superset of timezoneTypes.
+    inZoneInfo64 = lambda zone: zone in zoneinfoZones or zone in zoneinfoLinks
+
+    # Remove legacy ICU time zones from zoneinfo64 data.
+    (legacyZones, legacyLinks) = readICULegacyZones(icuDir)
+    zoneinfoZones = set(zone for zone in zoneinfoZones if zone not in legacyZones)
+    zoneinfoLinks = dict((zone, target) for (zone, target) in zoneinfoLinks.iteritems() if zone not in legacyLinks)
+
+    notFoundInZoneInfo64 = [zone for zone in typesZones if not inZoneInfo64(zone)]
+    if notFoundInZoneInfo64:
+        raise RuntimeError("Missing time zones in zoneinfo64.txt: %s" % notFoundInZoneInfo64)
+
+    notFoundInZoneInfo64 = [zone for zone in typesLinks.iterkeys() if not inZoneInfo64(zone)]
+    if notFoundInZoneInfo64:
+        raise RuntimeError("Missing time zones in zoneinfo64.txt: %s" % notFoundInZoneInfo64)
+
+    # zoneinfo64.txt only defines the supported time zones by ICU, the canonicalization
+    # rules are defined through timezoneTypes.txt. Merge both to get the actual zones
+    # and links used by ICU.
+    icuZones = set(chain(
+                 (zone for zone in zoneinfoZones if zone not in typesLinks),
+                 (zone for zone in typesZones)
+               ))
+    icuLinks = dict(chain(
+                 ((zone, target) for (zone, target) in zoneinfoLinks.iteritems() if zone not in typesZones),
+                 ((zone, target) for (zone, target) in typesLinks.iteritems())
+               ))
+
+    return (icuZones, icuLinks)
+
+
+def readICULegacyZones(icuDir):
+    """ Read the ICU legacy time zones from `icuTzDir`/tools/tzcode/icuzones
+        and returns the tuple (zones, links).
+    """
+    tzdir = TzDataDir(os.path.join(icuDir, "tools/tzcode"))
+    (zones, links) = readIANAFiles(tzdir, ["icuzones"])
+
+    # Remove the ICU placeholder time zone "Etc/Unknown".
+    zones.remove(Zone("Etc/Unknown"))
+
+    # tzdata2017c removed the link Canada/East-Saskatchewan -> America/Regina,
+    # but it is still present in ICU sources. Manually tag it as a legacy time
+    # zone so our tables are kept consistent with IANA.
+    links[Zone("Canada/East-Saskatchewan")] = "America/Regina"
+
+    return (zones, links)
+
+def icuTzDataVersion(icuTzDir):
+    """ Read the ICU time zone version from `icuTzDir`/zoneinfo64.txt. """
+    def searchInFile(pattern, f):
+        p = re.compile(pattern)
+        for line in flines(f, "utf-8-sig"):
+            m = p.search(line)
+            if m:
+                return m.group(1)
+        return None
+
+    zoneinfo = os.path.join(icuTzDir, "zoneinfo64.txt")
+    if not os.path.isfile(zoneinfo):
+        raise RuntimeError("file not found: %s" % zoneinfo)
+    version = searchInFile("^//\s+tz version:\s+([0-9]{4}[a-z])$", zoneinfo)
+    if version is None:
+        raise RuntimeError("%s does not contain a valid tzdata version string" % zoneinfo)
+    return version
+
+def findIncorrectICUZones(ianaZones, ianaLinks, icuZones, icuLinks, ignoreBackzone):
+    """ Find incorrect ICU zone entries. """
+    isIANATimeZone = lambda zone: zone in ianaZones or zone in ianaLinks
+    isICUTimeZone = lambda zone: zone in icuZones or zone in icuLinks
+    isICULink = lambda zone: zone in icuLinks
+
+    # All IANA zones should be present in ICU.
+    missingTimeZones = [zone for zone in ianaZones if not isICUTimeZone(zone)]
+    # Normally zones in backzone are also present as links in one of the other
+    # time zone files. The only exception to this rule is the Asia/Hanoi time
+    # zone, this zone is only present in the backzone file.
+    expectedMissing = [] if ignoreBackzone else [Zone("Asia/Hanoi")]
+    if missingTimeZones != expectedMissing:
+        raise RuntimeError("Not all zones are present in ICU, did you forget "
+                           "to run intl/update-tzdata.sh? %s" % missingTimeZones)
+
+    # Zones which are only present in ICU?
+    additionalTimeZones = [zone for zone in icuZones if not isIANATimeZone(zone)]
+    if additionalTimeZones:
+        raise RuntimeError("Additional zones present in ICU, did you forget "
+                           "to run intl/update-tzdata.sh? %s" % additionalTimeZones)
+
+    # Zones which are marked as links in ICU.
+    result = ((zone, icuLinks[zone]) for zone in ianaZones if isICULink(zone))
+
+    # Remove unnecessary UTC mappings.
+    utcnames = ["Etc/UTC", "Etc/UCT", "Etc/GMT"]
+    result = ifilterfalse(lambda (zone, target): zone.name in utcnames, result)
+
+    return sorted(result, key=itemgetter(0))
+
+def findIncorrectICULinks(ianaZones, ianaLinks, icuZones, icuLinks):
+    """ Find incorrect ICU link entries. """
+    isIANATimeZone = lambda zone: zone in ianaZones or zone in ianaLinks
+    isICUTimeZone = lambda zone: zone in icuZones or zone in icuLinks
+    isICULink = lambda zone: zone in icuLinks
+    isICUZone = lambda zone: zone in icuZones
+
+    # All links should be present in ICU.
+    missingTimeZones = [zone for zone in ianaLinks.iterkeys() if not isICUTimeZone(zone)]
+    if missingTimeZones:
+        raise RuntimeError("Not all zones are present in ICU, did you forget "
+                           "to run intl/update-tzdata.sh? %s" % missingTimeZones)
+
+    # Links which are only present in ICU?
+    additionalTimeZones = [zone for zone in icuLinks.iterkeys() if not isIANATimeZone(zone)]
+    if additionalTimeZones:
+        raise RuntimeError("Additional links present in ICU, did you forget "
+                           "to run intl/update-tzdata.sh? %s" % additionalTimeZones)
+
+    result = chain(
+        # IANA links which have a different target in ICU.
+        ((zone, target, icuLinks[zone]) for (zone, target) in ianaLinks.iteritems() if isICULink(zone) and target != icuLinks[zone]),
+
+        # IANA links which are zones in ICU.
+        ((zone, target, zone.name) for (zone, target) in ianaLinks.iteritems() if isICUZone(zone))
+    )
+
+    # Remove unnecessary UTC mappings.
+    utcnames = ["Etc/UTC", "Etc/UCT", "Etc/GMT"]
+    result = ifilterfalse(lambda (zone, target, icuTarget): target in utcnames and icuTarget in utcnames, result)
+
+    return sorted(result, key=itemgetter(0))
+
+generatedFileWarning = u"// Generated by make_intl_data.py. DO NOT EDIT."
+tzdataVersionComment = u"// tzdata version = {0}"
+
+def processTimeZones(tzdataDir, icuDir, icuTzDir, version, ignoreBackzone, ignoreFactory, out):
+    """ Read the time zone info and create a new time zone cpp file. """
+    print("Processing tzdata mapping...")
+    (ianaZones, ianaLinks) = readIANATimeZones(tzdataDir, ignoreBackzone, ignoreFactory)
+    (icuZones, icuLinks) = readICUTimeZones(icuDir, icuTzDir, ignoreFactory)
+    (legacyZones, legacyLinks) = readICULegacyZones(icuDir)
+
+    incorrectZones = findIncorrectICUZones(ianaZones, ianaLinks, icuZones, icuLinks, ignoreBackzone)
+    if not incorrectZones:
+        print("<<< No incorrect ICU time zones found, please update Intl.js! >>>")
+        print("<<< Maybe https://ssl.icu-project.org/trac/ticket/12044 was fixed? >>>")
+
+    incorrectLinks = findIncorrectICULinks(ianaZones, ianaLinks, icuZones, icuLinks)
+    if not incorrectLinks:
+        print("<<< No incorrect ICU time zone links found, please update Intl.js! >>>")
+        print("<<< Maybe https://ssl.icu-project.org/trac/ticket/12044 was fixed? >>>")
+
+    print("Writing Intl tzdata file...")
+    with io.open(out, mode="w", encoding="utf-8", newline="") as f:
+        println = partial(print, file=f)
+
+        println(generatedFileWarning)
+        println(tzdataVersionComment.format(version))
+        println(u"")
+
+        println(u"#ifndef builtin_IntlTimeZoneData_h")
+        println(u"#define builtin_IntlTimeZoneData_h")
+        println(u"")
+
+        println(u"namespace js {")
+        println(u"namespace timezone {")
+        println(u"")
+
+        println(u"// Format:")
+        println(u'// "ZoneName" // ICU-Name [time zone file]')
+        println(u"const char* const ianaZonesTreatedAsLinksByICU[] = {")
+        for (zone, icuZone) in incorrectZones:
+            println(u'    "%s", // %s [%s]' % (zone, icuZone, zone.filename))
+        println(u"};")
+        println(u"")
+
+        println(u"// Format:")
+        println(u'// "LinkName", "Target" // ICU-Target [time zone file]')
+        println(u"struct LinkAndTarget");
+        println(u"{");
+        println(u"    const char* const link;");
+        println(u"    const char* const target;");
+        println(u"};");
+        println(u"")
+        println(u"const LinkAndTarget ianaLinksCanonicalizedDifferentlyByICU[] = {")
+        for (zone, target, icuTarget) in incorrectLinks:
+            println(u'    { "%s", "%s" }, // %s [%s]' % (zone, target, icuTarget, zone.filename))
+        println(u"};")
+        println(u"")
+
+        println(u"// Legacy ICU time zones, these are not valid IANA time zone names. We also")
+        println(u"// disallow the old and deprecated System V time zones.")
+        println(u"// https://ssl.icu-project.org/repos/icu/trunk/icu4c/source/tools/tzcode/icuzones")
+        println(u"const char* const legacyICUTimeZones[] = {")
+        for zone in chain(sorted(legacyLinks.keys()), sorted(legacyZones)):
+            println(u'    "%s",' % zone)
+        println(u"};")
+        println(u"")
+
+        println(u"} // namespace timezone")
+        println(u"} // namespace js")
+        println(u"")
+        println(u"#endif /* builtin_IntlTimeZoneData_h */")
+
+def updateBackzoneLinks(tzdataDir, links):
+    (backzoneZones, backzoneLinks) = readIANAFiles(tzdataDir, ["backzone"])
+    (stableZones, updatedLinks, updatedZones) = partition(
+        links.iteritems(),
+        # Link not changed in backzone.
+        lambda (zone, target): zone not in backzoneLinks and zone not in backzoneZones,
+        # Link has a new target.
+        lambda (zone, target): zone in backzoneLinks,
+    )
+    # Keep stable zones and links with updated target.
+    return dict(chain(
+                stableZones,
+                imap(lambda (zone, target): (zone, backzoneLinks[zone]), updatedLinks)
+           ))
+
+def generateTzDataLinkTestContent(testDir, version, fileName, description, links):
+    with io.open(os.path.join(testDir, fileName), mode="w", encoding="utf-8", newline="") as f:
+        println = partial(print, file=f)
+
+        println(u'// |reftest| skip-if(!this.hasOwnProperty("Intl"))')
+        println(u"")
+        println(generatedFileWarning)
+        println(tzdataVersionComment.format(version))
+        println(u"""
+const tzMapper = [
+    x => x,
+    x => x.toUpperCase(),
+    x => x.toLowerCase(),
+];
+""")
+
+        println(description)
+        println(u"const links = {")
+        for (zone, target) in sorted(links, key=itemgetter(0)):
+            println(u'    "%s": "%s",' % (zone, target))
+        println(u"};")
+
+        println(u"""
+for (let [linkName, target] of Object.entries(links)) {
+    if (target === "Etc/UTC" || target === "Etc/GMT")
+        target = "UTC";
+
+    for (let map of tzMapper) {
+        let dtf = new Intl.DateTimeFormat(undefined, {timeZone: map(linkName)});
+        let resolvedTimeZone = dtf.resolvedOptions().timeZone;
+        assertEq(resolvedTimeZone, target, `${linkName} -> ${target}`);
+    }
+}
+""")
+        println(u"""
+if (typeof reportCompare === "function")
+    reportCompare(0, 0, "ok");
+""")
+
+def generateTzDataTestBackwardLinks(tzdataDir, version, ignoreBackzone, testDir):
+    (zones, links) = readIANAFiles(tzdataDir, ["backward"])
+    assert len(zones) == 0
+
+    if not ignoreBackzone:
+        links = updateBackzoneLinks(tzdataDir, links)
+
+    generateTzDataLinkTestContent(
+        testDir, version,
+        "timeZone_backward_links.js",
+        u"// Link names derived from IANA Time Zone Database, backward file.",
+        links.iteritems()
+    )
+
+def generateTzDataTestNotBackwardLinks(tzdataDir, version, ignoreBackzone, testDir):
+    tzfiles = ifilterfalse({"backward", "backzone"}.__contains__, listIANAFiles(tzdataDir))
+    (zones, links) = readIANAFiles(tzdataDir, tzfiles)
+
+    if not ignoreBackzone:
+        links = updateBackzoneLinks(tzdataDir, links)
+
+    generateTzDataLinkTestContent(
+        testDir, version,
+        "timeZone_notbackward_links.js",
+        u"// Link names derived from IANA Time Zone Database, excluding backward file.",
+        links.iteritems()
+    )
+
+def generateTzDataTestBackzone(tzdataDir, version, ignoreBackzone, testDir):
+    backzoneFiles = {"backzone"}
+    (bkfiles, tzfiles) = partition(listIANAFiles(tzdataDir), backzoneFiles.__contains__)
+
+    # Read zone and link infos.
+    (zones, links) = readIANAFiles(tzdataDir, tzfiles)
+    (backzones, backlinks) = readIANAFiles(tzdataDir, bkfiles)
+
+    if not ignoreBackzone:
+        comment=u"""\
+// This file was generated with historical, pre-1970 backzone information
+// respected. Therefore, every zone key listed below is its own Zone, not
+// a Link to a modern-day target as IANA ignoring backzones would say.
+
+"""
+    else:
+        comment=u"""\
+// This file was generated while ignoring historical, pre-1970 backzone
+// information. Therefore, every zone key listed below is part of a Link
+// whose target is the corresponding value.
+
+"""
+
+    generateTzDataLinkTestContent(
+        testDir, version,
+        "timeZone_backzone.js",
+        comment + u"// Backzone zones derived from IANA Time Zone Database.",
+        ((zone, zone if not ignoreBackzone else links[zone]) for zone in backzones if zone in links)
+    )
+
+def generateTzDataTestBackzoneLinks(tzdataDir, version, ignoreBackzone, testDir):
+    backzoneFiles = {"backzone"}
+    (bkfiles, tzfiles) = partition(listIANAFiles(tzdataDir), backzoneFiles.__contains__)
+
+    # Read zone and link infos.
+    (zones, links) = readIANAFiles(tzdataDir, tzfiles)
+    (backzones, backlinks) = readIANAFiles(tzdataDir, bkfiles)
+
+    if not ignoreBackzone:
+        comment=u"""\
+// This file was generated with historical, pre-1970 backzone information
+// respected. Therefore, every zone key listed below points to a target
+// in the backzone file and not to its modern-day target as IANA ignoring
+// backzones would say.
+
+"""
+    else:
+        comment=u"""\
+// This file was generated while ignoring historical, pre-1970 backzone
+// information. Therefore, every zone key listed below is part of a Link
+// whose target is the corresponding value ignoring any backzone entries.
+
+"""
+
+    generateTzDataLinkTestContent(
+        testDir, version,
+        "timeZone_backzone_links.js",
+        comment +  u"// Backzone links derived from IANA Time Zone Database.",
+        ((zone, target if not ignoreBackzone else links[zone]) for (zone, target) in backlinks.iteritems())
+    )
+
+def generateTzDataTests(tzdataDir, version, ignoreBackzone, testDir):
+    generateTzDataTestBackwardLinks(tzdataDir, version, ignoreBackzone, testDir)
+    generateTzDataTestNotBackwardLinks(tzdataDir, version, ignoreBackzone, testDir)
+    generateTzDataTestBackzone(tzdataDir, version, ignoreBackzone, testDir)
+    generateTzDataTestBackzoneLinks(tzdataDir, version, ignoreBackzone, testDir)
+
+def updateTzdata(args):
+    """ Update the time zone cpp file. """
+
+    # This script must reside in js/src/builtin to work correctly.
+    (thisDir, thisFile) = os.path.split(os.path.abspath(sys.argv[0]))
+    thisDir = os.path.normpath(thisDir)
+    if "/".join(thisDir.split(os.sep)[-3:]) != "js/src/builtin":
+        raise RuntimeError("%s must reside in js/src/builtin" % sys.argv[0])
+    topsrcdir = "/".join(thisDir.split(os.sep)[:-3])
+
+    icuDir = os.path.join(topsrcdir, "intl/icu/source")
+    if not os.path.isdir(icuDir):
+        raise RuntimeError("not a directory: %s" % icuDir)
+
+    icuTzDir = os.path.join(topsrcdir, "intl/tzdata/source")
+    if not os.path.isdir(icuTzDir):
+        raise RuntimeError("not a directory: %s" % icuTzDir)
+
+    dateTimeFormatTestDir = os.path.join(topsrcdir, "js/src/tests/Intl/DateTimeFormat")
+    if not os.path.isdir(dateTimeFormatTestDir):
+        raise RuntimeError("not a directory: %s" % dateTimeFormatTestDir)
+
+    tzDir = args.tz
+    if tzDir is not None and not (os.path.isdir(tzDir) or os.path.isfile(tzDir)):
+        raise RuntimeError("not a directory or file: %s" % tzDir)
+    ignoreBackzone = args.ignore_backzone
+    # TODO: Accept or ignore the placeholder time zone "Factory"?
+    ignoreFactory = False
+    out = args.out
+
+    version = icuTzDataVersion(icuTzDir)
+    url = "https://www.iana.org/time-zones/repository/releases/tzdata%s.tar.gz" % version
+
+    print("Arguments:")
+    print("\ttzdata version: %s" % version)
+    print("\ttzdata URL: %s" % url)
+    print("\ttzdata directory|file: %s" % tzDir)
+    print("\tICU directory: %s" % icuDir)
+    print("\tICU timezone directory: %s" % icuTzDir)
+    print("\tIgnore backzone file: %s" % ignoreBackzone)
+    print("\tOutput file: %s" % out)
+    print("")
+
+    def updateFrom(f):
+        if os.path.isfile(f) and tarfile.is_tarfile(f):
+            with tarfile.open(f, "r:*") as tar:
+                processTimeZones(TzDataFile(tar), icuDir, icuTzDir, version, ignoreBackzone, ignoreFactory, out)
+                generateTzDataTests(TzDataFile(tar), version, ignoreBackzone, dateTimeFormatTestDir)
+        elif os.path.isdir(f):
+            processTimeZones(TzDataDir(f), icuDir, icuTzDir, version, ignoreBackzone, ignoreFactory, out)
+            generateTzDataTests(TzDataDir(f), version, ignoreBackzone, dateTimeFormatTestDir)
+        else:
+            raise RuntimeError("unknown format")
+
+    if tzDir is None:
+        print("Downloading tzdata file...")
+        with closing(urllib2.urlopen(url)) as tzfile:
+            fname = urlparse.urlsplit(tzfile.geturl()).path.split("/")[-1]
+            with tempfile.NamedTemporaryFile(suffix=fname) as tztmpfile:
+                print("File stored in %s" % tztmpfile.name)
+                tztmpfile.write(tzfile.read())
+                tztmpfile.flush()
+                updateFrom(tztmpfile.name)
+    else:
+        updateFrom(tzDir)
+
+if __name__ == "__main__":
+    import argparse
+
+    def EnsureHttps(v):
+        if not v.startswith("https:"):
+            raise argparse.ArgumentTypeError("URL protocol must be https: " % v)
+        return v
+
+    parser = argparse.ArgumentParser(description="Update intl data.")
+    subparsers = parser.add_subparsers(help="Select update mode")
+
+    parser_tags = subparsers.add_parser("langtags",
+                                        help="Update language-subtag-registry")
+    parser_tags.add_argument("--url",
+                             metavar="URL",
+                             default="https://www.iana.org/assignments/language-subtag-registry",
+                             type=EnsureHttps,
+                             help="Download url for language-subtag-registry.txt (default: %(default)s)")
+    parser_tags.add_argument("--out",
+                             default="IntlData.js",
+                             help="Output file (default: %(default)s)")
+    parser_tags.add_argument("file",
+                             nargs="?",
+                             help="Local language-subtag-registry.txt file, if omitted uses <URL>")
+    parser_tags.set_defaults(func=updateLangTags)
+
+    parser_tz = subparsers.add_parser("tzdata", help="Update tzdata")
+    parser_tz.add_argument("--tz",
+                           help="Local tzdata directory or file, if omitted downloads tzdata "
+                                "distribution from https://www.iana.org/time-zones/")
+    # ICU doesn't include the backzone file by default, but we still like to
+    # use the backzone time zone names to avoid user confusion. This does lead
+    # to formatting "historic" dates (pre-1970 era) with the wrong time zone,
+    # but that's probably acceptable for now.
+    parser_tz.add_argument("--ignore-backzone",
+                           action="store_true",
+                           help="Ignore tzdata's 'backzone' file. Can be enabled to generate more "
+                                "accurate time zone canonicalization reflecting the actual time "
+                                "zones as used by ICU.")
+    parser_tz.add_argument("--out",
+                           default="IntlTimeZoneData.h",
+                           help="Output file (default: %(default)s)")
+    parser_tz.set_defaults(func=updateTzdata)
+
+    args = parser.parse_args()
+    args.func(args)
author	Matt A. Tobin <mattatobin@localhost.localdomain>	2018-02-02 04:16:08 -0500
committer	Matt A. Tobin <mattatobin@localhost.localdomain>	2018-02-02 04:16:08 -0500
commit	5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree	10027f336435511475e392454359edea8e25895d /js/src/builtin/make_intl_data.py
parent	49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
download	UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip