diff options
Diffstat (limited to 'third_party/aom/test/android/get_files.py')
-rw-r--r-- | third_party/aom/test/android/get_files.py | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/third_party/aom/test/android/get_files.py b/third_party/aom/test/android/get_files.py new file mode 100644 index 000000000..bdae9a315 --- /dev/null +++ b/third_party/aom/test/android/get_files.py @@ -0,0 +1,120 @@ +# +# Copyright (c) 2016, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and +# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +# was not distributed with this source code in the LICENSE file, you can +# obtain it at www.aomedia.org/license/software. If the Alliance for Open +# Media Patent License 1.0 was not distributed with this source code in the +# PATENTS file, you can obtain it at www.aomedia.org/license/patent. +# +# This simple script pulls test files from the webm homepage +# It is intelligent enough to only pull files if +# 1) File / test_data folder does not exist +# 2) SHA mismatch + +import pycurl +import csv +import hashlib +import re +import os.path +import time +import itertools +import sys +import getopt + +#globals +url = '' +file_list_path = '' +local_resource_path = '' + +# Helper functions: +# A simple function which returns the sha hash of a file in hex +def get_file_sha(filename): + try: + sha_hash = hashlib.sha1() + with open(filename, 'rb') as file: + buf = file.read(HASH_CHUNK) + while len(buf) > 0: + sha_hash.update(buf) + buf = file.read(HASH_CHUNK) + return sha_hash.hexdigest() + except IOError: + print "Error reading " + filename + +# Downloads a file from a url, and then checks the sha against the passed +# in sha +def download_and_check_sha(url, filename, sha): + path = os.path.join(local_resource_path, filename) + fp = open(path, "wb") + curl = pycurl.Curl() + curl.setopt(pycurl.URL, url + "/" + filename) + curl.setopt(pycurl.WRITEDATA, fp) + curl.perform() + curl.close() + fp.close() + return get_file_sha(path) == sha + +#constants +ftp_retries = 3 + +SHA_COL = 0 +NAME_COL = 1 +EXPECTED_COL = 2 +HASH_CHUNK = 65536 + +# Main script +try: + opts, args = \ + getopt.getopt(sys.argv[1:], \ + "u:i:o:", ["url=", "input_csv=", "output_dir="]) +except: + print 'get_files.py -u <url> -i <input_csv> -o <output_dir>' + sys.exit(2) + +for opt, arg in opts: + if opt == '-u': + url = arg + elif opt in ("-i", "--input_csv"): + file_list_path = os.path.join(arg) + elif opt in ("-o", "--output_dir"): + local_resource_path = os.path.join(arg) + +if len(sys.argv) != 7: + print "Expects two paths and a url!" + exit(1) + +if not os.path.isdir(local_resource_path): + os.makedirs(local_resource_path) + +file_list_csv = open(file_list_path, "rb") + +# Our 'csv' file uses multiple spaces as a delimiter, python's +# csv class only uses single character delimiters, so we convert them below +file_list_reader = csv.reader((re.sub(' +', ' ', line) \ + for line in file_list_csv), delimiter = ' ') + +file_shas = [] +file_names = [] + +for row in file_list_reader: + if len(row) != EXPECTED_COL: + continue + file_shas.append(row[SHA_COL]) + file_names.append(row[NAME_COL]) + +file_list_csv.close() + +# Download files, only if they don't already exist and have correct shas +for filename, sha in itertools.izip(file_names, file_shas): + path = os.path.join(local_resource_path, filename) + if os.path.isfile(path) \ + and get_file_sha(path) == sha: + print path + ' exists, skipping' + continue + for retry in range(0, ftp_retries): + print "Downloading " + path + if not download_and_check_sha(url, filename, sha): + print "Sha does not match, retrying..." + else: + break |