Issue #1258 - Part 1: Import mailnews, ldap, and mork from comm-esr52.9.1

author: Matt A. Tobin <email@mattatobin.com> 2019-11-03 00:17:46 -0400
committer: Matt A. Tobin <email@mattatobin.com> 2019-11-03 00:17:46 -0400
commit: 302bf1b523012e11b60425d6eee1221ebc2724eb (patch)
tree: b191a895f8716efcbe42f454f37597a545a6f421 /mailnews/extensions/fts3/data/generate_table.py
parent: 21b3f6247403c06f85e1f45d219f87549862198f (diff)
download: UXP-302bf1b523012e11b60425d6eee1221ebc2724eb.tar
UXP-302bf1b523012e11b60425d6eee1221ebc2724eb.tar.gz
UXP-302bf1b523012e11b60425d6eee1221ebc2724eb.tar.lz
UXP-302bf1b523012e11b60425d6eee1221ebc2724eb.tar.xz
UXP-302bf1b523012e11b60425d6eee1221ebc2724eb.zip
1 files changed, 264 insertions, 0 deletions
diff --git a/mailnews/extensions/fts3/data/generate_table.py b/mailnews/extensions/fts3/data/generate_table.py
new file mode 100644
index 000000000..f6b012685
--- /dev/null
+++ b/mailnews/extensions/fts3/data/generate_table.py
@@ -0,0 +1,264 @@
+#!/usr/bin/python
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is Mozilla Thunderbird.
+#
+# The Initial Developer of the Original Code is Mozilla Japan.
+# Portions created by the Initial Developer are Copyright (C) 2010
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Makoto Kato <m_kato@ga2.so-net.ne.jp>
+#   Andrew Sutherland <asutherland@asutherland.org>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+import re
+
+def printTable(f, t):
+	i = f
+	while i <= t:
+		c = array[i]
+		print "0x%04x," % c,
+		i = i + 1
+		if not i % 8:
+			print "\n\t",
+
+print '''/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is Mozilla Japan.
+ * Portions created by the Initial Developer are Copyright (C) 2010
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Makoto Kato <m_kato@ga2.so-net.ne.jp>
+ *   Andrew Sutherland <asutherland@asutherland.org>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+/* THIS FILE IS GENERATED BY generate_table.py.  DON'T EDIT THIS */
+'''
+
+p = re.compile('([0-9A-F]{4,5})(?:\.\.([0-9A-F]{4,5}))?[=\>]([0-9A-F]{4,5})?')
+G_FROM = 1
+G_TO = 2
+G_FIRSTVAL = 3
+
+# Array whose value at index i is the unicode value unicode character i should
+# map to.
+array = []
+# Contents of gNormalizeTable.  We insert zero entries for sub-pages where we
+# have no mappings.  We insert references to the tables where we do have
+# such tables.
+globalTable = []
+globalTable.append("0")
+# The (exclusive) upper bound of the conversion table, unicode character-wise.
+# This is 0x10000 because our generated table is only 16-bit.  This also limits
+# the values we can map to; we perform an identity mapping for target values
+# that >= maxmapping.
+maxmapping = 0x10000
+sizePerTable = 64
+
+# Map characters that the mapping tells us to obliterate to the NUKE_CHAR
+# (such lines look like "FFF0..FFF8>")
+# We do this because if we didn't do this, we would emit these characters as
+# part of a token, which we definitely don't want.
+NUKE_CHAR = 0x20
+
+# --- load case folding table
+# entries in the file look like:
+#  0041>0061
+#  02D8>0020 0306
+#  2000..200A>0020
+#
+# The 0041 (uppercase A) tells us it lowercases to 0061 (lowercase a).
+# The 02D8 is a "spacing clone[s] of diacritic" breve which gets decomposed into
+#  a space character and a breve.  This entry/type of entry also shows up in
+#  'nfkc.txt'.
+# The 2000..200A covers a range of space characters and maps them down to the
+#  'normal' space character.
+
+file = open('nfkc_cf.txt')
+
+m = None
+line = "\n"
+i = 0x0
+while i < maxmapping and line:
+        if not m:
+                line = file.readline()
+                m = p.match(line)
+                if not m:
+                        continue
+                low = int(m.group(G_FROM), 16)
+                # if G_TO is present, use it, otherwise fallback to low
+                high = m.group(G_TO) and int(m.group(G_TO), 16) or low
+                # if G_FIRSTVAL is present use it, otherwise use NUKE_CHAR
+                val = (m.group(G_FIRSTVAL) and int(m.group(G_FIRSTVAL), 16)
+                                           or NUKE_CHAR)
+		continue
+
+
+        if i >= low and i <= high:
+		if val >= maxmapping:
+			array.append(i)
+		else:
+			array.append(val)
+                if i == high:
+                        m = None
+	else:
+		array.append(i)
+	i = i + 1
+file.close()
+
+# --- load normalization / decomposition table
+# It is important that this file gets processed second because the other table
+# will tell us about mappings from uppercase U with diaeresis to lowercase u
+# with diaeresis.  We obviously don't want that clobbering our value.  (Although
+# this would work out if we propagated backwards rather than forwards...)
+#
+# - entries in this file that we care about look like:
+#  00A0>0020
+#  0100=0041 0304
+#
+# They are found in the "Canonical and compatibility decomposition mappings"
+# section.
+#
+# The 00A0 is mapping NBSP to the normal space character.
+# The 0100 (a capital A with a bar over top of) is equivalent to 0041 (capital
+#  A) plus a 0304 (combining overline).  We do not care about the combining
+#  marks which is why our regular expression does not capture it.
+#
+#
+# - entries that we do not care about look like:
+#  0300..0314:230
+#
+# These map marks to their canonical combining class which appears to be a way
+# of specifying the precedence / order in which marks should be combined.  The
+# key thing is we don't care about them.
+file = open('nfkc.txt')
+line = file.readline()
+m = p.match(line)
+while line:
+	if not m:
+		line = file.readline()
+		m = p.match(line)
+		continue
+
+        low = int(m.group(G_FROM), 16)
+        # if G_TO is present, use it, otherwise fallback to low
+        high = m.group(G_TO) and int(m.group(G_TO), 16) or low
+        # if G_FIRSTVAL is present use it, otherwise fall back to NUKE_CHAR
+        val = m.group(G_FIRSTVAL) and int(m.group(G_FIRSTVAL), 16) or NUKE_CHAR
+        for i in range(low, high+1):
+                if i < maxmapping and val < maxmapping:
+                        array[i] = val
+	m = None
+file.close()
+
+# --- generate a normalized table to support case and accent folding
+
+i = 0
+needTerm = False;
+while i < maxmapping:
+	if not i % sizePerTable:
+		# table is empty?
+		j = i
+		while j < i + sizePerTable:
+			if array[j] != j:
+				break
+			j += 1
+
+		if j == i + sizePerTable:
+			if i:
+				globalTable.append("0")
+			i += sizePerTable
+			continue
+
+		if needTerm:
+			print "};\n"
+		globalTable.append("gNormalizeTable%04x" % i)
+		print "static const unsigned short gNormalizeTable%04x[] = {\n\t" % i,
+		print "/* U+%04x */\n\t" % i,
+		needTerm = True
+        # Decomposition does not case-fold, so we want to compensate by
+        # performing a lookup here.  Because decomposition chains can be
+        # example: 01d5, a capital U with a diaeresis and a bar. yes, really.
+        # 01d5 -> 00dc -> 0055 (U) -> 0075 (u)
+        c = array[i]
+        while c != array[c]:
+                c = array[c]
+        if c >= 0x41 and c <= 0x5a:
+                raise Exception('got an uppercase character somehow: %x => %x'
+                                % (i, c))
+	print "0x%04x," % c,
+	i = i + 1
+	if not i % 8:
+		print "\n\t",
+
+print "};\n\nstatic const unsigned short* gNormalizeTable[] = {",
+i = 0
+while i < (maxmapping / sizePerTable):
+	if not i % 4:
+		print "\n\t",
+	print globalTable[i] + ",", 
+	i += 1
+
+print '''
+};
+
+unsigned int normalize_character(const unsigned int c)
+{
+  if (c >= ''' + ('0x%x' % (maxmapping,)) + ''' || !gNormalizeTable[c >> 6])
+    return c;
+  return gNormalizeTable[c >> 6][c & 0x3f];
+}
+'''
author	Matt A. Tobin <email@mattatobin.com>	2019-11-03 00:17:46 -0400
committer	Matt A. Tobin <email@mattatobin.com>	2019-11-03 00:17:46 -0400
commit	302bf1b523012e11b60425d6eee1221ebc2724eb (patch)
tree	b191a895f8716efcbe42f454f37597a545a6f421 /mailnews/extensions/fts3/data/generate_table.py
parent	21b3f6247403c06f85e1f45d219f87549862198f (diff)
download	UXP-302bf1b523012e11b60425d6eee1221ebc2724eb.tar UXP-302bf1b523012e11b60425d6eee1221ebc2724eb.tar.gz UXP-302bf1b523012e11b60425d6eee1221ebc2724eb.tar.lz UXP-302bf1b523012e11b60425d6eee1221ebc2724eb.tar.xz UXP-302bf1b523012e11b60425d6eee1221ebc2724eb.zip