diff options
author | Matt A. Tobin <email@mattatobin.com> | 2020-11-09 20:37:05 -0500 |
---|---|---|
committer | Matt A. Tobin <email@mattatobin.com> | 2020-11-09 20:37:05 -0500 |
commit | 51468e998c8e7191ddecacec3944c806b29dd590 (patch) | |
tree | c713f075c54781868ec119ea5c5f3c9369af3576 /js/src/regexp/import-irregexp.py | |
parent | 77746f1d900a35eceb23bd760983e95de7b4a547 (diff) | |
download | UXP-51468e998c8e7191ddecacec3944c806b29dd590.tar UXP-51468e998c8e7191ddecacec3944c806b29dd590.tar.gz UXP-51468e998c8e7191ddecacec3944c806b29dd590.tar.lz UXP-51468e998c8e7191ddecacec3944c806b29dd590.tar.xz UXP-51468e998c8e7191ddecacec3944c806b29dd590.zip |
Issue #1677 - Part 5: "Simplify" regexp re-import process (and re-import from later revision)
I am going on record to say Mozilla are utter fucking assholes for pulling this as part of their progression.
Diffstat (limited to 'js/src/regexp/import-irregexp.py')
-rw-r--r-- | js/src/regexp/import-irregexp.py | 143 |
1 files changed, 143 insertions, 0 deletions
diff --git a/js/src/regexp/import-irregexp.py b/js/src/regexp/import-irregexp.py new file mode 100644 index 000000000..870387232 --- /dev/null +++ b/js/src/regexp/import-irregexp.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +# This script handles all the mechanical steps of importing irregexp from v8: +# +# 1. Acquire the source: either from github, or optionally from a local copy of v8. +# 2. Copy the contents of v8/src/regexp into js/src/regexp +# - Exclude files that we have chosen not to import. +# 3. While doing so, update #includes: +# - Change "src/regexp/*" to "regexp/*". +# - Remove other v8-specific headers completely. +# 4. Add '#include "regexp/regexp-shim.h" in the necessary places. +# 5. Update the VERSION file to include the correct git hash. +# +# Usage: +# cd path/to/js/src/regexp +# ./import-irregexp.py --path path/to/v8/src/regexp +# +# Alternatively, without the --path argument, import-irregexp.py will +# clone v8 from github into a temporary directory. +# +# After running this script, changes to the shim code may be necessary +# to account for changes in upstream irregexp. + +import os +import re +import subprocess +import sys +from pathlib import Path + + +def get_hash(path): + # Get the hash for the current git revision + cwd = os.getcwd() + os.chdir(path) + command = ['git', 'rev-parse', 'HEAD'] + result = subprocess.check_output(command, encoding='utf-8') + os.chdir(cwd) + return result.rstrip() + + +def copy_and_update_includes(src_path, dst_path): + # List of header files that need to include the shim header + need_shim = ['property-sequences.h', + 'regexp-ast.h', + 'regexp-bytecode-peephole.h', + 'regexp-bytecodes.h', + 'regexp-dotprinter.h', + 'regexp.h', + 'regexp-macro-assembler.h', + 'regexp-stack.h', + 'special-case.h'] + + src = open(str(src_path), 'r') + dst = open(str(dst_path), 'w') + + # 1. Rewrite includes of V8 regexp headers: + regexp_include = re.compile('#include "src/regexp') + regexp_include_new = '#include "regexp' + + # 2. Remove includes of other V8 headers + other_include = re.compile('#include "src/') + + # 3. If needed, add '#include "regexp/regexp-shim.h"'. + # Note: We get a little fancy to ensure that header files are + # in alphabetic order. `need_to_add_shim` is true if we still + # have to add the shim header in this file. `adding_shim_now` + # is true if we have found a '#include "src/*' and we are just + # waiting to find something alphabetically smaller (or an empty + # line) so that we can insert the shim header in the right place. + need_to_add_shim = src_path.name in need_shim + adding_shim_now = False + + for line in src: + if adding_shim_now: + if (line == '\n' or line > '#include "src/regexp/regexp-shim.h"'): + dst.write('#include "regexp/regexp-shim.h"\n') + need_to_add_shim = False + adding_shim_now = False + + if regexp_include.search(line): + dst.write(re.sub(regexp_include, regexp_include_new, line)) + elif other_include.search(line): + if need_to_add_shim: + adding_shim_now = True + else: + dst.write(line) + + +def import_from(srcdir, dstdir): + excluded = ['OWNERS', + 'regexp.cc', + 'regexp-utils.cc', + 'regexp-utils.h', + 'regexp-macro-assembler-arch.h'] + + for file in srcdir.iterdir(): + if file.is_dir(): + continue + if str(file.name) in excluded: + continue + copy_and_update_includes(file, dstdir / file.name) + + # Update VERSION file + hash = get_hash(srcdir) + version_file = open(str(dstdir / 'VERSION'), 'w') + version_file.write('Imported using import-irregexp.py from:\n') + version_file.write('https://github.com/v8/v8/tree/%s/src/regexp\n' % hash) + + +if __name__ == '__main__': + import argparse + import tempfile + + # This script should be run from js/src/regexp to work correctly. + current_path = Path(os.getcwd()) + expected_path = 'js/src/regexp' + if not current_path.match(expected_path): + raise RuntimeError('%s must be run from %s' % (sys.argv[0], + expected_path)) + + parser = argparse.ArgumentParser(description='Import irregexp from v8') + parser.add_argument('-p', '--path', help='path to v8/src/regexp') + args = parser.parse_args() + + if args.path: + src_path = Path(args.path) + + if not (src_path / 'regexp.h').exists(): + print('Usage:\n import-irregexp.py --path <path/to/v8/src/regexp>') + sys.exit(1) + import_from(src_path, current_path) + sys.exit(0) + + with tempfile.TemporaryDirectory() as tempdir: + v8_git = 'https://github.com/v8/v8.git' + clone = 'git clone --depth 1 %s %s' % (v8_git, tempdir) + os.system(clone) + src_path = Path(tempdir) / 'src/regexp' + import_from(src_path, current_path) |