diff options
Diffstat (limited to 'intl/icu/source/tools/icu-svnprops-check.py')
-rwxr-xr-x | intl/icu/source/tools/icu-svnprops-check.py | 246 |
1 files changed, 246 insertions, 0 deletions
diff --git a/intl/icu/source/tools/icu-svnprops-check.py b/intl/icu/source/tools/icu-svnprops-check.py new file mode 100755 index 000000000..4eb505c7f --- /dev/null +++ b/intl/icu/source/tools/icu-svnprops-check.py @@ -0,0 +1,246 @@ +#! /usr/bin/python + +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +# Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others. +# All rights reserved. + +# +# Script to check and fix svn property settings for ICU source files. +# Also check for the correct line endings on files with svn:eol-style = native +# +# THIS SCRIPT DOES NOT WORK ON WINDOWS +# It only works correctly on platforms where the native line ending is a plain \n +# +# usage: +# icu-svnprops-check.py [options] +# +# options: +# -f | --fix Fix any problems that are found +# -h | --help Print a usage line and exit. +# +# The tool operates recursively on the directory from which it is run. +# Only files from the svn repository are checked. +# No changes are made to the repository; only the working copy will be altered. + +import sys +import os +import os.path +import re +import getopt + +# +# svn autoprops definitions. +# Copy and paste here the ICU recommended auto-props from +# http://icu-project.org/docs/subversion_howto/index.html +# +# This program will parse this autoprops string, and verify that files in +# the repository have the recommeded properties set. +# +svn_auto_props = """ +### Section for configuring automatic properties. +[auto-props] +### The format of the entries is: +### file-name-pattern = propname[=value][;propname[=value]...] +### The file-name-pattern can contain wildcards (such as '*' and +### '?'). All entries which match will be applied to the file. +### Note that auto-props functionality must be enabled, which +### is typically done by setting the 'enable-auto-props' option. +*.c = svn:eol-style=native +*.cc = svn:eol-style=native +*.cpp = svn:eol-style=native +*.h = svn:eol-style=native +*.rc = svn:eol-style=native +*.dsp = svn:eol-style=native +*.dsw = svn:eol-style=native +*.sln = svn:eol-style=native +*.vcproj = svn:eol-style=native +configure = svn:eol-style=native;svn:executable +*.sh = svn:eol-style=native;svn:executable +*.pl = svn:eol-style=native;svn:executable +*.py = svn:eol-style=native;svn:executable +*.txt = svn:mime-type=text/plain;svn:eol-style=native +*.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 +*.ucm = svn:eol-style=native +*.html = svn:eol-style=native;svn:mime-type=text/html +*.htm = svn:eol-style=native;svn:mime-type=text/html +*.xml = svn:eol-style=native +Makefile = svn:eol-style=native +*.in = svn:eol-style=native +*.mak = svn:eol-style=native +*.mk = svn:eol-style=native +*.png = svn:mime-type=image/png +*.jpeg = svn:mime-type=image/jpeg +*.jpg = svn:mime-type=image/jpeg +*.bin = svn:mime-type=application/octet-stream +*.brk = svn:mime-type=application/octet-stream +*.cnv = svn:mime-type=application/octet-stream +*.dat = svn:mime-type=application/octet-stream +*.icu = svn:mime-type=application/octet-stream +*.res = svn:mime-type=application/octet-stream +*.spp = svn:mime-type=application/octet-stream +# new additions 2007-dec-5 srl +*.rtf = mime-type=text/rtf +*.pdf = mime-type=application/pdf +# changed 2008-04-08: modified .txt, above, adding mime-type +# changed 2010-11-09: modified .java, adding mime-type +# Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1 +""" + + +# file_types: The parsed form of the svn auto-props specification. +# A list of file types - .cc, .cpp, .txt, etc. +# each element is a [type, proplist] +# "type" is a regular expression string that will match a file name +# prop list is another list, one element per property. +# Each property item is a two element list, [prop name, prop value] +file_types = list() + +def parse_auto_props(): + aprops = svn_auto_props.splitlines() + for propline in aprops: + if re.match("\s*(#.*)?$", propline): # Match comment and blank lines + continue + if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line. + continue + if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> = + print "Bad line from autoprops definitions: " + propline + continue + file_type, string_proplist = propline.split("=", 1) + + #transform the file type expression from autoprops into a normal regular expression. + # e.g. "*.cpp" ==> ".*\.cpp$" + file_type = file_type.strip() + file_type = file_type.replace(".", "\.") + file_type = file_type.replace("*", ".*") + file_type = file_type + "$" + + # example string_proplist at this point: " svn:eol-style=native;svn:executable" + # split on ';' into a list of properties. The negative lookahead and lookbehind + # in the split regexp are to prevent matching on ';;', which is an escaped ';' + # within a property value. + string_proplist = re.split("(?<!;);(?!;)", string_proplist) + proplist = list() + for prop in string_proplist: + if prop.find("=") >= 0: + prop_name, prop_val = prop.split("=", 1) + else: + # properties with no explicit value, e.g. svn:executable + prop_name, prop_val = prop, "" + prop_name = prop_name.strip() + prop_val = prop_val.strip() + # unescape any ";;" in a property value, e.g. the mime-type from + # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 + prop_val = prop_val.replace(";;", ";"); + proplist.append((prop_name, prop_val)) + + file_types.append((file_type, proplist)) + # print file_types + + +def runCommand(cmd): + output_file = os.popen(cmd); + output_text = output_file.read(); + exit_status = output_file.close(); + if exit_status: + print >>sys.stderr, '"', cmd, '" failed. Exiting.' + sys.exit(exit_status) + return output_text + + +def usage(): + print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]" + + +# +# UTF-8 file check. For text files, add a charset to the mime-type if their contents are UTF-8 +# file_name: name of a text file. +# base_mime_type: svn:mime-type property value from the auto-props file (no charset= part) +# actual_mime_type: existing svn:mime-type property value for the file. +# return: svn:mime-type property value, with charset added when appropriate. +# +def check_utf8(file_name, base_mime_type, actual_mime_type): + + # If the file already has a charset in its mime-type, don't make any change. + + if actual_mime_type.find("charset=") > 0: + return actual_mime_type; + + f = open(file_name, 'r') + bytes = f.read() + f.close() + + if all(ord(byte) < 128 for byte in bytes): + # pure ASCII. + # print "Pure ASCII " + file_name + return base_mime_type + + try: + bytes.decode("UTF-8") + except UnicodeDecodeError: + print "warning: %s: not ASCII, not UTF-8" % file_name + return base_mime_type + + if ord(bytes[0]) != 0xef: + print "UTF-8 file with no BOM: " + file_name + + # Append charset=utf-8. + return base_mime_type + ';charset=utf-8' + + +def main(argv): + fix_problems = False; + try: + opts, args = getopt.getopt(argv, "fh", ("fix", "help")) + except getopt.GetoptError: + print "unrecognized option: " + argv[0] + usage() + sys.exit(2) + for opt, arg in opts: + if opt in ("-h", "--help"): + usage() + sys.exit() + if opt in ("-f", "--fix"): + fix_problems = True + if args: + print "unexpected command line argument" + usage() + sys.exit() + + parse_auto_props() + output = runCommand("svn ls -R "); + file_list = output.splitlines() + + for f in file_list: + if os.path.isdir(f): + # print "Skipping dir " + f + continue + if not os.path.isfile(f): + print "Repository file not in working copy: " + f + continue; + + for file_pattern, props in file_types: + if re.match(file_pattern, f): + # print "doing " + f + for propname, propval in props: + actual_propval = runCommand("svn propget --strict " + propname + " " + f) + #print propname + ": " + actual_propval + if propname == "svn:mime-type" and propval.find("text/") == 0: + # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8 + propval = check_utf8(f, propval, actual_propval) + if not (propval == actual_propval or (propval == "" and actual_propval == "*")): + print "svn propset %s '%s' %s" % (propname, propval, f) + if fix_problems: + os.system("svn propset %s '%s' %s" % (propname, propval, f)) + if propname == "svn:eol-style" and propval == "native": + if os.system("grep -q -v \r " + f): + if fix_problems: + print f + ": Removing DOS CR characters." + os.system("sed -i s/\r// " + f); + else: + print f + " contains DOS CR characters." + + +if __name__ == "__main__": + main(sys.argv[1:]) |