summaryrefslogtreecommitdiffstats
path: root/js/src/regexp/import-irregexp.py
diff options
context:
space:
mode:
authorMatt A. Tobin <email@mattatobin.com>2020-11-09 20:37:05 -0500
committerMatt A. Tobin <email@mattatobin.com>2020-11-09 20:37:05 -0500
commit51468e998c8e7191ddecacec3944c806b29dd590 (patch)
treec713f075c54781868ec119ea5c5f3c9369af3576 /js/src/regexp/import-irregexp.py
parent77746f1d900a35eceb23bd760983e95de7b4a547 (diff)
downloadUXP-51468e998c8e7191ddecacec3944c806b29dd590.tar
UXP-51468e998c8e7191ddecacec3944c806b29dd590.tar.gz
UXP-51468e998c8e7191ddecacec3944c806b29dd590.tar.lz
UXP-51468e998c8e7191ddecacec3944c806b29dd590.tar.xz
UXP-51468e998c8e7191ddecacec3944c806b29dd590.zip
Issue #1677 - Part 5: "Simplify" regexp re-import process (and re-import from later revision)
I am going on record to say Mozilla are utter fucking assholes for pulling this as part of their progression.
Diffstat (limited to 'js/src/regexp/import-irregexp.py')
-rw-r--r--js/src/regexp/import-irregexp.py143
1 files changed, 143 insertions, 0 deletions
diff --git a/js/src/regexp/import-irregexp.py b/js/src/regexp/import-irregexp.py
new file mode 100644
index 000000000..870387232
--- /dev/null
+++ b/js/src/regexp/import-irregexp.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This script handles all the mechanical steps of importing irregexp from v8:
+#
+# 1. Acquire the source: either from github, or optionally from a local copy of v8.
+# 2. Copy the contents of v8/src/regexp into js/src/regexp
+# - Exclude files that we have chosen not to import.
+# 3. While doing so, update #includes:
+# - Change "src/regexp/*" to "regexp/*".
+# - Remove other v8-specific headers completely.
+# 4. Add '#include "regexp/regexp-shim.h" in the necessary places.
+# 5. Update the VERSION file to include the correct git hash.
+#
+# Usage:
+# cd path/to/js/src/regexp
+# ./import-irregexp.py --path path/to/v8/src/regexp
+#
+# Alternatively, without the --path argument, import-irregexp.py will
+# clone v8 from github into a temporary directory.
+#
+# After running this script, changes to the shim code may be necessary
+# to account for changes in upstream irregexp.
+
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+
+def get_hash(path):
+ # Get the hash for the current git revision
+ cwd = os.getcwd()
+ os.chdir(path)
+ command = ['git', 'rev-parse', 'HEAD']
+ result = subprocess.check_output(command, encoding='utf-8')
+ os.chdir(cwd)
+ return result.rstrip()
+
+
+def copy_and_update_includes(src_path, dst_path):
+ # List of header files that need to include the shim header
+ need_shim = ['property-sequences.h',
+ 'regexp-ast.h',
+ 'regexp-bytecode-peephole.h',
+ 'regexp-bytecodes.h',
+ 'regexp-dotprinter.h',
+ 'regexp.h',
+ 'regexp-macro-assembler.h',
+ 'regexp-stack.h',
+ 'special-case.h']
+
+ src = open(str(src_path), 'r')
+ dst = open(str(dst_path), 'w')
+
+ # 1. Rewrite includes of V8 regexp headers:
+ regexp_include = re.compile('#include "src/regexp')
+ regexp_include_new = '#include "regexp'
+
+ # 2. Remove includes of other V8 headers
+ other_include = re.compile('#include "src/')
+
+ # 3. If needed, add '#include "regexp/regexp-shim.h"'.
+ # Note: We get a little fancy to ensure that header files are
+ # in alphabetic order. `need_to_add_shim` is true if we still
+ # have to add the shim header in this file. `adding_shim_now`
+ # is true if we have found a '#include "src/*' and we are just
+ # waiting to find something alphabetically smaller (or an empty
+ # line) so that we can insert the shim header in the right place.
+ need_to_add_shim = src_path.name in need_shim
+ adding_shim_now = False
+
+ for line in src:
+ if adding_shim_now:
+ if (line == '\n' or line > '#include "src/regexp/regexp-shim.h"'):
+ dst.write('#include "regexp/regexp-shim.h"\n')
+ need_to_add_shim = False
+ adding_shim_now = False
+
+ if regexp_include.search(line):
+ dst.write(re.sub(regexp_include, regexp_include_new, line))
+ elif other_include.search(line):
+ if need_to_add_shim:
+ adding_shim_now = True
+ else:
+ dst.write(line)
+
+
+def import_from(srcdir, dstdir):
+ excluded = ['OWNERS',
+ 'regexp.cc',
+ 'regexp-utils.cc',
+ 'regexp-utils.h',
+ 'regexp-macro-assembler-arch.h']
+
+ for file in srcdir.iterdir():
+ if file.is_dir():
+ continue
+ if str(file.name) in excluded:
+ continue
+ copy_and_update_includes(file, dstdir / file.name)
+
+ # Update VERSION file
+ hash = get_hash(srcdir)
+ version_file = open(str(dstdir / 'VERSION'), 'w')
+ version_file.write('Imported using import-irregexp.py from:\n')
+ version_file.write('https://github.com/v8/v8/tree/%s/src/regexp\n' % hash)
+
+
+if __name__ == '__main__':
+ import argparse
+ import tempfile
+
+ # This script should be run from js/src/regexp to work correctly.
+ current_path = Path(os.getcwd())
+ expected_path = 'js/src/regexp'
+ if not current_path.match(expected_path):
+ raise RuntimeError('%s must be run from %s' % (sys.argv[0],
+ expected_path))
+
+ parser = argparse.ArgumentParser(description='Import irregexp from v8')
+ parser.add_argument('-p', '--path', help='path to v8/src/regexp')
+ args = parser.parse_args()
+
+ if args.path:
+ src_path = Path(args.path)
+
+ if not (src_path / 'regexp.h').exists():
+ print('Usage:\n import-irregexp.py --path <path/to/v8/src/regexp>')
+ sys.exit(1)
+ import_from(src_path, current_path)
+ sys.exit(0)
+
+ with tempfile.TemporaryDirectory() as tempdir:
+ v8_git = 'https://github.com/v8/v8.git'
+ clone = 'git clone --depth 1 %s %s' % (v8_git, tempdir)
+ os.system(clone)
+ src_path = Path(tempdir) / 'src/regexp'
+ import_from(src_path, current_path)