From bbcc64772580c8a979288791afa02d30bc476d2e Mon Sep 17 00:00:00 2001 From: trav90 Date: Fri, 19 Oct 2018 21:52:15 -0500 Subject: Update aom to v1.0.0 Update aom to commit id d14c5bb4f336ef1842046089849dee4a301fbbf0. --- third_party/aom/tools/all_builds.py | 72 -- third_party/aom/tools/aom_entropy_optimizer.c | 1052 ++++++++------------ third_party/aom/tools/author_first_release.sh | 15 - third_party/aom/tools/build_inspector.sh | 50 - third_party/aom/tools/dump_obu.cc | 164 +++ third_party/aom/tools/ftfy.sh | 158 --- third_party/aom/tools/obu_parser.cc | 189 ++++ third_party/aom/tools/obu_parser.h | 27 + .../aom/tools/txfm_analyzer/txfm_gen_code.cc | 580 +++++++++++ third_party/aom/tools/txfm_analyzer/txfm_graph.cc | 943 ++++++++++++++++++ third_party/aom/tools/txfm_analyzer/txfm_graph.h | 161 +++ 11 files changed, 2500 insertions(+), 911 deletions(-) delete mode 100755 third_party/aom/tools/all_builds.py delete mode 100755 third_party/aom/tools/author_first_release.sh delete mode 100755 third_party/aom/tools/build_inspector.sh create mode 100644 third_party/aom/tools/dump_obu.cc delete mode 100755 third_party/aom/tools/ftfy.sh create mode 100644 third_party/aom/tools/obu_parser.cc create mode 100644 third_party/aom/tools/obu_parser.h create mode 100644 third_party/aom/tools/txfm_analyzer/txfm_gen_code.cc create mode 100644 third_party/aom/tools/txfm_analyzer/txfm_graph.cc create mode 100644 third_party/aom/tools/txfm_analyzer/txfm_graph.h (limited to 'third_party/aom/tools') diff --git a/third_party/aom/tools/all_builds.py b/third_party/aom/tools/all_builds.py deleted file mode 100755 index d1f0c80c0..000000000 --- a/third_party/aom/tools/all_builds.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/python - -import getopt -import subprocess -import sys - -LONG_OPTIONS = ["shard=", "shards="] -BASE_COMMAND = "./configure --enable-internal-stats --enable-experimental" - -def RunCommand(command): - run = subprocess.Popen(command, shell=True) - output = run.communicate() - if run.returncode: - print "Non-zero return code: " + str(run.returncode) + " => exiting!" - sys.exit(1) - -def list_of_experiments(): - experiments = [] - configure_file = open("configure") - list_start = False - for line in configure_file.read().split("\n"): - if line == 'EXPERIMENT_LIST="': - list_start = True - elif line == '"': - list_start = False - elif list_start: - currently_broken = ["csm"] - experiment = line[4:] - if experiment not in currently_broken: - experiments.append(experiment) - return experiments - -def main(argv): - # Parse arguments - options = {"--shard": 0, "--shards": 1} - if "--" in argv: - opt_end_index = argv.index("--") - else: - opt_end_index = len(argv) - try: - o, _ = getopt.getopt(argv[1:opt_end_index], None, LONG_OPTIONS) - except getopt.GetoptError, err: - print str(err) - print "Usage: %s [--shard= --shards=] -- [configure flag ...]"%argv[0] - sys.exit(2) - - options.update(o) - extra_args = argv[opt_end_index + 1:] - - # Shard experiment list - shard = int(options["--shard"]) - shards = int(options["--shards"]) - experiments = list_of_experiments() - base_command = " ".join([BASE_COMMAND] + extra_args) - configs = [base_command] - configs += ["%s --enable-%s" % (base_command, e) for e in experiments] - my_configs = zip(configs, range(len(configs))) - my_configs = filter(lambda x: x[1] % shards == shard, my_configs) - my_configs = [e[0] for e in my_configs] - - # Run configs for this shard - for config in my_configs: - test_build(config) - -def test_build(configure_command): - print "\033[34m\033[47mTesting %s\033[0m" % (configure_command) - RunCommand(configure_command) - RunCommand("make clean") - RunCommand("make") - -if __name__ == "__main__": - main(sys.argv) diff --git a/third_party/aom/tools/aom_entropy_optimizer.c b/third_party/aom/tools/aom_entropy_optimizer.c index 962c1af36..551adf4f2 100644 --- a/third_party/aom/tools/aom_entropy_optimizer.c +++ b/third_party/aom/tools/aom_entropy_optimizer.c @@ -25,209 +25,36 @@ #include #include -#include "./aom_config.h" -#include "av1/common/entropymode.h" - -#if CONFIG_SMOOTH_HV -const aom_tree_index av1_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = { - -DC_PRED, - 2, /* 0 = DC_NODE */ - -TM_PRED, - 4, /* 1 = TM_NODE */ - -V_PRED, - 6, /* 2 = V_NODE */ - 8, - 12, /* 3 = COM_NODE */ - -H_PRED, - 10, /* 4 = H_NODE */ - -D135_PRED, - -D117_PRED, /* 5 = D135_NODE */ - -D45_PRED, - 14, /* 6 = D45_NODE */ - -D63_PRED, - 16, /* 7 = D63_NODE */ - -D153_PRED, - 18, /* 8 = D153_NODE */ - -D207_PRED, - 20, /* 9 = D207_NODE */ - -SMOOTH_PRED, - 22, /* 10 = SMOOTH_NODE */ - -SMOOTH_V_PRED, - -SMOOTH_H_PRED /* 11 = SMOOTH_V_NODE */ -}; -#else -const aom_tree_index av1_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = { - -DC_PRED, 2, /* 0 = DC_NODE */ - -TM_PRED, 4, /* 1 = TM_NODE */ - -V_PRED, 6, /* 2 = V_NODE */ - 8, 12, /* 3 = COM_NODE */ - -H_PRED, 10, /* 4 = H_NODE */ - -D135_PRED, -D117_PRED, /* 5 = D135_NODE */ - -D45_PRED, 14, /* 6 = D45_NODE */ - -D63_PRED, 16, /* 7 = D63_NODE */ - -D153_PRED, 18, /* 8 = D153_NODE */ - -D207_PRED, -SMOOTH_PRED, /* 9 = D207_NODE */ -}; -#endif // CONFIG_SMOOTH_HV + +#include "config/aom_config.h" + +#include "av1/encoder/encoder.h" #define SPACES_PER_TAB 2 +#define CDF_MAX_SIZE 16 typedef unsigned int aom_count_type; // A log file recording parsed counts static FILE *logfile; // TODO(yuec): make it a command line option -static INLINE aom_prob get_binary_prob_new(unsigned int n0, unsigned int n1) { - // The "+1" will prevent this function from generating extreme probability - // when both n0 and n1 are small - const unsigned int den = n0 + 1 + n1 + 1; - return get_prob(n0 + 1, den); -} +static void counts_to_cdf(const aom_count_type *counts, aom_cdf_prob *cdf, + int modes) { + int64_t csum[CDF_MAX_SIZE]; + assert(modes <= CDF_MAX_SIZE); -// Optimized probabilities will be stored in probs[]. -static unsigned int optimize_tree_probs(const aom_tree_index *tree, - unsigned int idx, - const unsigned int *counts, - aom_prob *probs) { - const int l = tree[idx]; - const unsigned int left_count = - (l <= 0) ? counts[-l] : optimize_tree_probs(tree, l, counts, probs); - const int r = tree[idx + 1]; - const unsigned int right_count = - (r <= 0) ? counts[-r] : optimize_tree_probs(tree, r, counts, probs); - probs[idx >> 1] = get_binary_prob_new(left_count, right_count); - return left_count + right_count; -} - -static int parse_stats(aom_count_type **ct_ptr, FILE *const probsfile, int tabs, - int dim_of_cts, int *cts_each_dim, - const aom_tree_index *tree, int flatten_last_dim) { - if (dim_of_cts < 1) { - fprintf(stderr, "The dimension of a counts vector should be at least 1!\n"); - return 1; - } - if (dim_of_cts == 1) { - const int total_modes = cts_each_dim[0]; - aom_count_type *counts1d = *ct_ptr; - aom_prob *probs = aom_malloc(sizeof(*probs) * (total_modes - 1)); - - if (probs == NULL) { - fprintf(stderr, "Allocating prob array failed!\n"); - return 1; - } + csum[0] = counts[0] + 1; + for (int i = 1; i < modes; ++i) csum[i] = counts[i] + 1 + csum[i - 1]; - (*ct_ptr) += total_modes; - if (tree != NULL) { - optimize_tree_probs(tree, 0, counts1d, probs); - } else { - assert(total_modes == 2); - probs[0] = get_binary_prob_new(counts1d[0], counts1d[1]); - } - if (tabs > 0) fprintf(probsfile, "%*c", tabs * SPACES_PER_TAB, ' '); - for (int k = 0; k < total_modes - 1; ++k) { - if (k == total_modes - 2) - fprintf(probsfile, " %3d ", probs[k]); - else - fprintf(probsfile, " %3d,", probs[k]); - fprintf(logfile, "%d ", counts1d[k]); - } - fprintf(logfile, "%d\n", counts1d[total_modes - 1]); - } else if (dim_of_cts == 2 && flatten_last_dim) { - assert(cts_each_dim[1] == 2); - - for (int k = 0; k < cts_each_dim[0]; ++k) { - if (k == cts_each_dim[0] - 1) { - fprintf(probsfile, " %3d ", - get_binary_prob_new((*ct_ptr)[0], (*ct_ptr)[1])); - } else { - fprintf(probsfile, " %3d,", - get_binary_prob_new((*ct_ptr)[0], (*ct_ptr)[1])); - } - fprintf(logfile, "%d %d\n", (*ct_ptr)[0], (*ct_ptr)[1]); - (*ct_ptr) += 2; - } - } else { - for (int k = 0; k < cts_each_dim[0]; ++k) { - int tabs_next_level; - if (dim_of_cts == 2 || (dim_of_cts == 3 && flatten_last_dim)) { - fprintf(probsfile, "%*c{", tabs * SPACES_PER_TAB, ' '); - tabs_next_level = 0; - } else { - fprintf(probsfile, "%*c{\n", tabs * SPACES_PER_TAB, ' '); - tabs_next_level = tabs + 1; - } - if (parse_stats(ct_ptr, probsfile, tabs_next_level, dim_of_cts - 1, - cts_each_dim + 1, tree, flatten_last_dim)) { - return 1; - } - if (dim_of_cts == 2 || (dim_of_cts == 3 && flatten_last_dim)) { - if (k == cts_each_dim[0] - 1) - fprintf(probsfile, "}\n"); - else - fprintf(probsfile, "},\n"); - } else { - if (k == cts_each_dim[0] - 1) - fprintf(probsfile, "%*c}\n", tabs * SPACES_PER_TAB, ' '); - else - fprintf(probsfile, "%*c},\n", tabs * SPACES_PER_TAB, ' '); - } - } - } - return 0; -} - -// This function parses the stats of a syntax, either binary or multi-symbol, -// in different contexts, and writes the optimized probability table to -// probsfile. -// counts: pointer of the first count element in counts array -// probsfile: output file -// dim_of_cts: number of dimensions of counts array -// cts_each_dim: an array storing size of each dimension of counts array -// tree: binary tree for a multi-symbol syntax, or NULL for a binary one -// flatten_last_dim: for a binary syntax, if flatten_last_dim is 0, probs in -// different contexts will be written separately, e.g., -// {{p1}, {p2}, ...}; -// otherwise will be grouped together at the second last -// dimension, i.e., -// {p1, p2, ...}. -// prefix: declaration header for the entropy table -static void optimize_entropy_table(aom_count_type *counts, - FILE *const probsfile, int dim_of_cts, - int *cts_each_dim, - const aom_tree_index *tree, - int flatten_last_dim, char *prefix) { - aom_count_type *ct_ptr = counts; - - assert(!flatten_last_dim || cts_each_dim[dim_of_cts - 1] == 2); - - fprintf(probsfile, "%s = {\n", prefix); - if (parse_stats(&ct_ptr, probsfile, 1, dim_of_cts, cts_each_dim, tree, - flatten_last_dim)) { - fprintf(probsfile, "Optimizer failed!\n"); - } - fprintf(probsfile, "};\n\n"); + for (int i = 0; i < modes; ++i) fprintf(logfile, "%d ", counts[i]); fprintf(logfile, "\n"); -} - -static int counts_to_cdf(const aom_count_type *counts, aom_cdf_prob *cdf, - int modes) { - int64_t *csum = aom_malloc(sizeof(*csum) * modes); - - if (csum == NULL) { - fprintf(stderr, "Allocating csum array failed!\n"); - return 1; - } - csum[0] = counts[0]; - for (int i = 1; i < modes; ++i) csum[i] = counts[i] + csum[i - 1]; int64_t sum = csum[modes - 1]; - int64_t round_shift = sum >> 1; + const int64_t round_shift = sum >> 1; for (int i = 0; i < modes; ++i) { - if (sum <= 0) - cdf[i] = CDF_PROB_TOP; - else - cdf[i] = (csum[i] * CDF_PROB_TOP + round_shift) / sum; + cdf[i] = (csum[i] * CDF_PROB_TOP + round_shift) / sum; + cdf[i] = AOMMIN(cdf[i], CDF_PROB_TOP - (modes - 1 + i) * 4); + cdf[i] = (i == 0) ? AOMMAX(cdf[i], 4) : AOMMAX(cdf[i], cdf[i - 1] + 4); } - return 0; } static int parse_counts_for_cdf_opt(aom_count_type **ct_ptr, @@ -237,29 +64,28 @@ static int parse_counts_for_cdf_opt(aom_count_type **ct_ptr, fprintf(stderr, "The dimension of a counts vector should be at least 1!\n"); return 1; } + const int total_modes = cts_each_dim[0]; if (dim_of_cts == 1) { - const int total_modes = cts_each_dim[0]; + assert(total_modes <= CDF_MAX_SIZE); + aom_cdf_prob cdfs[CDF_MAX_SIZE]; aom_count_type *counts1d = *ct_ptr; - aom_cdf_prob *cdfs = aom_malloc(sizeof(*cdfs) * total_modes); - - if (cdfs == NULL) { - fprintf(stderr, "Allocating cdf array failed!\n"); - return 1; - } counts_to_cdf(counts1d, cdfs, total_modes); (*ct_ptr) += total_modes; if (tabs > 0) fprintf(probsfile, "%*c", tabs * SPACES_PER_TAB, ' '); - for (int k = 0; k < total_modes; ++k) - fprintf(probsfile, " AOM_ICDF(%d),", cdfs[k]); - fprintf(probsfile, " 0 "); + fprintf(probsfile, "AOM_CDF%d(", total_modes); + for (int k = 0; k < total_modes - 1; ++k) { + fprintf(probsfile, "%d", cdfs[k]); + if (k < total_modes - 2) fprintf(probsfile, ", "); + } + fprintf(probsfile, ")"); } else { - for (int k = 0; k < cts_each_dim[0]; ++k) { + for (int k = 0; k < total_modes; ++k) { int tabs_next_level; if (dim_of_cts == 2) - fprintf(probsfile, "%*c{", tabs * SPACES_PER_TAB, ' '); + fprintf(probsfile, "%*c{ ", tabs * SPACES_PER_TAB, ' '); else fprintf(probsfile, "%*c{\n", tabs * SPACES_PER_TAB, ' '); tabs_next_level = dim_of_cts == 2 ? 0 : tabs + 1; @@ -270,19 +96,18 @@ static int parse_counts_for_cdf_opt(aom_count_type **ct_ptr, } if (dim_of_cts == 2) { - if (k == cts_each_dim[0] - 1) - fprintf(probsfile, "}\n"); + if (k == total_modes - 1) + fprintf(probsfile, " }\n"); else - fprintf(probsfile, "},\n"); + fprintf(probsfile, " },\n"); } else { - if (k == cts_each_dim[0] - 1) + if (k == total_modes - 1) fprintf(probsfile, "%*c}\n", tabs * SPACES_PER_TAB, ' '); else fprintf(probsfile, "%*c},\n", tabs * SPACES_PER_TAB, ' '); } } } - return 0; } @@ -292,11 +117,144 @@ static void optimize_cdf_table(aom_count_type *counts, FILE *const probsfile, aom_count_type *ct_ptr = counts; fprintf(probsfile, "%s = {\n", prefix); + fprintf(logfile, "%s\n", prefix); if (parse_counts_for_cdf_opt(&ct_ptr, probsfile, 1, dim_of_cts, cts_each_dim)) { fprintf(probsfile, "Optimizer failed!\n"); } fprintf(probsfile, "};\n\n"); + fprintf(logfile, "============================\n"); +} + +static void optimize_uv_mode(aom_count_type *counts, FILE *const probsfile, + int dim_of_cts, int *cts_each_dim, char *prefix) { + aom_count_type *ct_ptr = counts; + + fprintf(probsfile, "%s = {\n", prefix); + fprintf(probsfile, "%*c{\n", SPACES_PER_TAB, ' '); + fprintf(logfile, "%s\n", prefix); + cts_each_dim[2] = UV_INTRA_MODES - 1; + for (int k = 0; k < cts_each_dim[1]; ++k) { + fprintf(probsfile, "%*c{ ", 2 * SPACES_PER_TAB, ' '); + parse_counts_for_cdf_opt(&ct_ptr, probsfile, 0, dim_of_cts - 2, + cts_each_dim + 2); + if (k + 1 == cts_each_dim[1]) { + fprintf(probsfile, " }\n"); + } else { + fprintf(probsfile, " },\n"); + } + ++ct_ptr; + } + fprintf(probsfile, "%*c},\n", SPACES_PER_TAB, ' '); + fprintf(probsfile, "%*c{\n", SPACES_PER_TAB, ' '); + cts_each_dim[2] = UV_INTRA_MODES; + parse_counts_for_cdf_opt(&ct_ptr, probsfile, 2, dim_of_cts - 1, + cts_each_dim + 1); + fprintf(probsfile, "%*c}\n", SPACES_PER_TAB, ' '); + fprintf(probsfile, "};\n\n"); + fprintf(logfile, "============================\n"); +} + +static void optimize_cdf_table_var_modes_2d(aom_count_type *counts, + FILE *const probsfile, + int dim_of_cts, int *cts_each_dim, + int *modes_each_ctx, char *prefix) { + aom_count_type *ct_ptr = counts; + + assert(dim_of_cts == 2); + (void)dim_of_cts; + + fprintf(probsfile, "%s = {\n", prefix); + fprintf(logfile, "%s\n", prefix); + + for (int d0_idx = 0; d0_idx < cts_each_dim[0]; ++d0_idx) { + int num_of_modes = modes_each_ctx[d0_idx]; + + if (num_of_modes > 0) { + fprintf(probsfile, "%*c{ ", SPACES_PER_TAB, ' '); + parse_counts_for_cdf_opt(&ct_ptr, probsfile, 0, 1, &num_of_modes); + ct_ptr += cts_each_dim[1] - num_of_modes; + fprintf(probsfile, " },\n"); + } else { + fprintf(probsfile, "%*c{ 0 },\n", SPACES_PER_TAB, ' '); + fprintf(logfile, "dummy cdf, no need to optimize\n"); + ct_ptr += cts_each_dim[1]; + } + } + fprintf(probsfile, "};\n\n"); + fprintf(logfile, "============================\n"); +} + +static void optimize_cdf_table_var_modes_3d(aom_count_type *counts, + FILE *const probsfile, + int dim_of_cts, int *cts_each_dim, + int *modes_each_ctx, char *prefix) { + aom_count_type *ct_ptr = counts; + + assert(dim_of_cts == 3); + (void)dim_of_cts; + + fprintf(probsfile, "%s = {\n", prefix); + fprintf(logfile, "%s\n", prefix); + + for (int d0_idx = 0; d0_idx < cts_each_dim[0]; ++d0_idx) { + fprintf(probsfile, "%*c{\n", SPACES_PER_TAB, ' '); + for (int d1_idx = 0; d1_idx < cts_each_dim[1]; ++d1_idx) { + int num_of_modes = modes_each_ctx[d0_idx]; + + if (num_of_modes > 0) { + fprintf(probsfile, "%*c{ ", 2 * SPACES_PER_TAB, ' '); + parse_counts_for_cdf_opt(&ct_ptr, probsfile, 0, 1, &num_of_modes); + ct_ptr += cts_each_dim[2] - num_of_modes; + fprintf(probsfile, " },\n"); + } else { + fprintf(probsfile, "%*c{ 0 },\n", 2 * SPACES_PER_TAB, ' '); + fprintf(logfile, "dummy cdf, no need to optimize\n"); + ct_ptr += cts_each_dim[2]; + } + } + fprintf(probsfile, "%*c},\n", SPACES_PER_TAB, ' '); + } + fprintf(probsfile, "};\n\n"); + fprintf(logfile, "============================\n"); +} + +static void optimize_cdf_table_var_modes_4d(aom_count_type *counts, + FILE *const probsfile, + int dim_of_cts, int *cts_each_dim, + int *modes_each_ctx, char *prefix) { + aom_count_type *ct_ptr = counts; + + assert(dim_of_cts == 4); + (void)dim_of_cts; + + fprintf(probsfile, "%s = {\n", prefix); + fprintf(logfile, "%s\n", prefix); + + for (int d0_idx = 0; d0_idx < cts_each_dim[0]; ++d0_idx) { + fprintf(probsfile, "%*c{\n", SPACES_PER_TAB, ' '); + for (int d1_idx = 0; d1_idx < cts_each_dim[1]; ++d1_idx) { + fprintf(probsfile, "%*c{\n", 2 * SPACES_PER_TAB, ' '); + for (int d2_idx = 0; d2_idx < cts_each_dim[2]; ++d2_idx) { + int num_of_modes = modes_each_ctx[d0_idx]; + + if (num_of_modes > 0) { + fprintf(probsfile, "%*c{ ", 3 * SPACES_PER_TAB, ' '); + parse_counts_for_cdf_opt(&ct_ptr, probsfile, 0, 1, &num_of_modes); + ct_ptr += cts_each_dim[3] - num_of_modes; + fprintf(probsfile, " },\n"); + } else { + fprintf(probsfile, "%*c{ 0 },\n", 3 * SPACES_PER_TAB, ' '); + fprintf(logfile, "dummy cdf, no need to optimize\n"); + ct_ptr += cts_each_dim[3]; + } + } + fprintf(probsfile, "%*c},\n", 2 * SPACES_PER_TAB, ' '); + } + fprintf(probsfile, "%*c},\n", SPACES_PER_TAB, ' '); + } + fprintf(probsfile, "};\n\n"); + fprintf(logfile, "============================\n"); } int main(int argc, const char **argv) { @@ -312,7 +270,8 @@ int main(int argc, const char **argv) { } FRAME_COUNTS fc; - fread(&fc, sizeof(FRAME_COUNTS), 1, statsfile); + const size_t bytes = fread(&fc, sizeof(FRAME_COUNTS), 1, statsfile); + if (!bytes) return 1; FILE *const probsfile = fopen("optimized_probs.c", "w"); if (probsfile == NULL) { @@ -330,67 +289,81 @@ int main(int argc, const char **argv) { int cts_each_dim[10]; /* Intra mode (keyframe luma) */ - cts_each_dim[0] = INTRA_MODES; - cts_each_dim[1] = INTRA_MODES; + cts_each_dim[0] = KF_MODE_CONTEXTS; + cts_each_dim[1] = KF_MODE_CONTEXTS; cts_each_dim[2] = INTRA_MODES; - optimize_entropy_table( - &fc.kf_y_mode[0][0][0], probsfile, 3, cts_each_dim, av1_intra_mode_tree, - 0, - "const aom_prob av1_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]" - "[INTRA_MODES - 1]"); - optimize_cdf_table( - &fc.kf_y_mode[0][0][0], probsfile, 3, cts_each_dim, - "const aom_cdf_prob\n" - "av1_kf_y_mode_cdf[INTRA_MODES][INTRA_MODES][CDF_SIZE(INTRA_MODES)]"); + optimize_cdf_table(&fc.kf_y_mode[0][0][0], probsfile, 3, cts_each_dim, + "const aom_cdf_prob\n" + "default_kf_y_mode_cdf[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS]" + "[CDF_SIZE(INTRA_MODES)]"); + + cts_each_dim[0] = DIRECTIONAL_MODES; + cts_each_dim[1] = 2 * MAX_ANGLE_DELTA + 1; + optimize_cdf_table(&fc.angle_delta[0][0], probsfile, 2, cts_each_dim, + "static const aom_cdf_prob default_angle_delta_cdf" + "[DIRECTIONAL_MODES][CDF_SIZE(2 * MAX_ANGLE_DELTA + 1)]"); /* Intra mode (non-keyframe luma) */ cts_each_dim[0] = BLOCK_SIZE_GROUPS; cts_each_dim[1] = INTRA_MODES; - optimize_entropy_table( - &fc.y_mode[0][0], probsfile, 2, cts_each_dim, av1_intra_mode_tree, 0, - "static const aom_prob default_if_y_probs[BLOCK_SIZE_GROUPS]" - "[INTRA_MODES - 1]"); optimize_cdf_table( &fc.y_mode[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob\n" "default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTRA_MODES)]"); /* Intra mode (chroma) */ - cts_each_dim[0] = INTRA_MODES; - cts_each_dim[1] = UV_INTRA_MODES; - optimize_entropy_table(&fc.uv_mode[0][0], probsfile, 2, cts_each_dim, - av1_intra_mode_tree, 0, - "static const aom_prob default_uv_probs[INTRA_MODES]" - "[UV_INTRA_MODES - 1]"); - optimize_cdf_table( - &fc.uv_mode[0][0], probsfile, 2, cts_each_dim, - "static const aom_cdf_prob\n" - "default_uv_mode_cdf[INTRA_MODES][CDF_SIZE(UV_INTRA_MODES)]"); + cts_each_dim[0] = CFL_ALLOWED_TYPES; + cts_each_dim[1] = INTRA_MODES; + cts_each_dim[2] = UV_INTRA_MODES; + optimize_uv_mode(&fc.uv_mode[0][0][0], probsfile, 3, cts_each_dim, + "static const aom_cdf_prob\n" + "default_uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES]" + "[CDF_SIZE(UV_INTRA_MODES)]"); - /* Partition */ + /* block partition */ cts_each_dim[0] = PARTITION_CONTEXTS; -#if CONFIG_EXT_PARTITION_TYPES cts_each_dim[1] = EXT_PARTITION_TYPES; - // TODO(yuec): Wrong prob for context = 0, because the old tree is used - optimize_entropy_table(&fc.partition[0][0], probsfile, 2, cts_each_dim, - av1_ext_partition_tree, 0, - "static const aom_prob default_partition_probs" - "[PARTITION_CONTEXTS][EXT_PARTITION_TYPES - 1]"); - optimize_cdf_table(&fc.partition[0][0], probsfile, 2, cts_each_dim, + int part_types_each_ctx[PARTITION_CONTEXTS] = { + 4, 4, 4, 4, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 8, 8, 8 + }; + optimize_cdf_table_var_modes_2d( + &fc.partition[0][0], probsfile, 2, cts_each_dim, part_types_each_ctx, + "static const aom_cdf_prob default_partition_cdf[PARTITION_CONTEXTS]" + "[CDF_SIZE(EXT_PARTITION_TYPES)]"); + + /* tx type */ + cts_each_dim[0] = EXT_TX_SETS_INTRA; + cts_each_dim[1] = EXT_TX_SIZES; + cts_each_dim[2] = INTRA_MODES; + cts_each_dim[3] = TX_TYPES; + int intra_ext_tx_types_each_ctx[EXT_TX_SETS_INTRA] = { 0, 7, 5 }; + optimize_cdf_table_var_modes_4d( + &fc.intra_ext_tx[0][0][0][0], probsfile, 4, cts_each_dim, + intra_ext_tx_types_each_ctx, + "static const aom_cdf_prob default_intra_ext_tx_cdf[EXT_TX_SETS_INTRA]" + "[EXT_TX_SIZES][INTRA_MODES][CDF_SIZE(TX_TYPES)]"); + + cts_each_dim[0] = EXT_TX_SETS_INTER; + cts_each_dim[1] = EXT_TX_SIZES; + cts_each_dim[2] = TX_TYPES; + int inter_ext_tx_types_each_ctx[EXT_TX_SETS_INTER] = { 0, 16, 12, 2 }; + optimize_cdf_table_var_modes_3d( + &fc.inter_ext_tx[0][0][0], probsfile, 3, cts_each_dim, + inter_ext_tx_types_each_ctx, + "static const aom_cdf_prob default_inter_ext_tx_cdf[EXT_TX_SETS_INTER]" + "[EXT_TX_SIZES][CDF_SIZE(TX_TYPES)]"); + + /* Chroma from Luma */ + cts_each_dim[0] = CFL_JOINT_SIGNS; + optimize_cdf_table(&fc.cfl_sign[0], probsfile, 1, cts_each_dim, "static const aom_cdf_prob\n" - "default_partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(EXT_" - "PARTITION_TYPES)]"); -#else - cts_each_dim[1] = PARTITION_TYPES; - optimize_entropy_table(&fc.partition[0][0], probsfile, 2, cts_each_dim, - av1_partition_tree, 0, - "static const aom_prob default_partition_probs" - "[PARTITION_CONTEXTS][PARTITION_TYPES - 1]"); - optimize_cdf_table( - &fc.partition[0][0], probsfile, 2, cts_each_dim, - "static const aom_cdf_prob\n" - "default_partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(PARTITION_TYPES)]"); -#endif + "default_cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)]"); + cts_each_dim[0] = CFL_ALPHA_CONTEXTS; + cts_each_dim[1] = CFL_ALPHABET_SIZE; + optimize_cdf_table(&fc.cfl_alpha[0][0], probsfile, 2, cts_each_dim, + "static const aom_cdf_prob\n" + "default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS]" + "[CDF_SIZE(CFL_ALPHABET_SIZE)]"); /* Interpolation filter */ cts_each_dim[0] = SWITCHABLE_FILTER_CONTEXTS; @@ -403,36 +376,24 @@ int main(int argc, const char **argv) { /* Motion vector referencing */ cts_each_dim[0] = NEWMV_MODE_CONTEXTS; cts_each_dim[1] = 2; - optimize_entropy_table( - &fc.newmv_mode[0][0], probsfile, 2, cts_each_dim, NULL, 1, - "static const aom_prob default_newmv_prob[NEWMV_MODE_CONTEXTS]"); optimize_cdf_table(&fc.newmv_mode[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob " "default_newmv_cdf[NEWMV_MODE_CONTEXTS][CDF_SIZE(2)]"); - cts_each_dim[0] = ZEROMV_MODE_CONTEXTS; + cts_each_dim[0] = GLOBALMV_MODE_CONTEXTS; cts_each_dim[1] = 2; - optimize_entropy_table( - &fc.zeromv_mode[0][0], probsfile, 2, cts_each_dim, NULL, 1, - "static const aom_prob default_zeromv_prob[ZEROMV_MODE_CONTEXTS]"); optimize_cdf_table(&fc.zeromv_mode[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob " - "default_zeromv_cdf[ZEROMV_MODE_CONTEXTS][CDF_SIZE(2)]"); + "default_zeromv_cdf[GLOBALMV_MODE_CONTEXTS][CDF_SIZE(2)]"); cts_each_dim[0] = REFMV_MODE_CONTEXTS; cts_each_dim[1] = 2; - optimize_entropy_table( - &fc.refmv_mode[0][0], probsfile, 2, cts_each_dim, NULL, 1, - "static const aom_prob default_refmv_prob[REFMV_MODE_CONTEXTS]"); optimize_cdf_table(&fc.refmv_mode[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob " "default_refmv_cdf[REFMV_MODE_CONTEXTS][CDF_SIZE(2)]"); cts_each_dim[0] = DRL_MODE_CONTEXTS; cts_each_dim[1] = 2; - optimize_entropy_table( - &fc.drl_mode[0][0], probsfile, 2, cts_each_dim, NULL, 1, - "static const aom_prob default_drl_prob[DRL_MODE_CONTEXTS]"); optimize_cdf_table(&fc.drl_mode[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob " "default_drl_cdf[DRL_MODE_CONTEXTS][CDF_SIZE(2)]"); @@ -441,48 +402,20 @@ int main(int argc, const char **argv) { /* New compound mode */ cts_each_dim[0] = INTER_MODE_CONTEXTS; cts_each_dim[1] = INTER_COMPOUND_MODES; - optimize_entropy_table( - &fc.inter_compound_mode[0][0], probsfile, 2, cts_each_dim, - av1_inter_compound_mode_tree, 0, - "static const aom_prob default_inter_compound_mode_probs\n" - "[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES - 1]"); optimize_cdf_table(&fc.inter_compound_mode[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob\n" "default_inter_compound_mode_cdf[INTER_MODE_CONTEXTS][CDF_" "SIZE(INTER_COMPOUND_MODES)]"); -#if CONFIG_COMPOUND_SINGLEREF - /* Compound singleref mode */ - cts_each_dim[0] = INTER_MODE_CONTEXTS; - cts_each_dim[1] = INTER_SINGLEREF_COMP_MODES; - optimize_entropy_table( - &fc.inter_singleref_comp_mode[0][0], probsfile, 2, cts_each_dim, - av1_inter_singleref_comp_mode_tree, 0, - "static const aom_prob default_inter_singleref_comp_mode_probs\n" - "[INTER_MODE_CONTEXTS][INTER_SINGLEREF_COMP_MODES - 1]"); - optimize_cdf_table(&fc.inter_singleref_comp_mode[0][0], probsfile, 2, - cts_each_dim, - "static const aom_cdf_prob\n" - "default_inter_singleref_comp_mode_cdf[INTER_MODE_" - "CONTEXTS][CDF_SIZE(INTER_SINGLEREF_COMP_MODES)]"); -#endif -#if CONFIG_INTERINTRA + /* Interintra */ cts_each_dim[0] = BLOCK_SIZE_GROUPS; cts_each_dim[1] = 2; - optimize_entropy_table( - &fc.interintra[0][0], probsfile, 2, cts_each_dim, NULL, 1, - "static const aom_prob default_interintra_prob[BLOCK_SIZE_GROUPS]"); optimize_cdf_table(&fc.interintra[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob " "default_interintra_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(2)]"); cts_each_dim[0] = BLOCK_SIZE_GROUPS; cts_each_dim[1] = INTERINTRA_MODES; - optimize_entropy_table( - &fc.interintra_mode[0][0], probsfile, 2, cts_each_dim, - av1_interintra_mode_tree, 0, - "static const aom_prob " - "default_interintra_mode_prob[BLOCK_SIZE_GROUPS][INTERINTRA_MODES - 1]"); optimize_cdf_table(&fc.interintra_mode[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob\n" "default_interintra_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(" @@ -490,83 +423,40 @@ int main(int argc, const char **argv) { cts_each_dim[0] = BLOCK_SIZES_ALL; cts_each_dim[1] = 2; - optimize_entropy_table( - &fc.wedge_interintra[0][0], probsfile, 2, cts_each_dim, NULL, 1, - "static const aom_prob default_wedge_interintra_prob[BLOCK_SIZES_ALL]"); optimize_cdf_table( &fc.wedge_interintra[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob\n" "default_wedge_interintra_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)]"); -#endif + /* Compound type */ cts_each_dim[0] = BLOCK_SIZES_ALL; - cts_each_dim[1] = COMPOUND_TYPES; - optimize_entropy_table(&fc.compound_interinter[0][0], probsfile, 2, - cts_each_dim, av1_compound_type_tree, 0, - "static const aom_prob default_compound_type_probs" - "[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1]"); - optimize_cdf_table( - &fc.compound_interinter[0][0], probsfile, 2, cts_each_dim, - "static const aom_cdf_prob\n" - "default_compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES)]"); + cts_each_dim[1] = COMPOUND_TYPES - 1; + optimize_cdf_table(&fc.compound_type[0][0], probsfile, 2, cts_each_dim, + "static const aom_cdf_prob default_compound_type_cdf" + "[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES - 1)]"); + + cts_each_dim[0] = BLOCK_SIZES_ALL; + cts_each_dim[1] = 16; + optimize_cdf_table(&fc.wedge_idx[0][0], probsfile, 2, cts_each_dim, + "static const aom_cdf_prob " + "default_wedge_idx_cdf[BLOCK_SIZES_ALL][CDF_SIZE(16)]"); -/* motion_var and warped_motion experiments */ -#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION + /* motion_var and warped_motion experiments */ cts_each_dim[0] = BLOCK_SIZES_ALL; cts_each_dim[1] = MOTION_MODES; - optimize_entropy_table( - &fc.motion_mode[0][0], probsfile, 2, cts_each_dim, av1_motion_mode_tree, - 0, - "static const aom_prob default_motion_mode_prob[BLOCK_SIZES]" - "[MOTION_MODES - 1]"); optimize_cdf_table( &fc.motion_mode[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob\n" "default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)]"); -#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION cts_each_dim[0] = BLOCK_SIZES_ALL; cts_each_dim[1] = 2; - optimize_entropy_table( - &fc.obmc[0][0], probsfile, 2, cts_each_dim, NULL, 1, - "static const aom_prob default_obmc_prob[BLOCK_SIZES_ALL]"); optimize_cdf_table(&fc.obmc[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob " "default_obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)]"); -#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION -#if CONFIG_NCOBMC_ADAPT_WEIGHT - cts_each_dim[0] = ADAPT_OVERLAP_BLOCKS; - cts_each_dim[1] = MAX_NCOBMC_MODES; - optimize_entropy_table( - &fc.ncobmc_mode[0][0], probsfile, 2, cts_each_dim, av1_ncobmc_mode_tree, - 0, - "static const aom_prob default_ncobmc_mode_prob[ADAPT_OVERLAP_BLOCKS]" - "[MAX_NCOBMC_MODES - 1]"); - optimize_cdf_table(&fc.ncobmc_mode[0][0], probsfile, 2, cts_each_dim, - "static const aom_cdf_prob\n" - "default_ncobmc_mode_cdf[ADAPT_OVERLAP_BLOCKS]" - "[CDF_SIZE(MAX_NCOBMC_MODES)]"); -#if CONFIG_WARPED_MOTION - cts_each_dim[0] = BLOCK_SIZES_ALL; - cts_each_dim[1] = OBMC_FAMILY_MODES; - optimize_entropy_table( - &fc.ncobmc[0][0], probsfile, 2, cts_each_dim, av1_ncobmc_tree, 0, - "static const aom_prob default_ncobmc_prob[BLOCK_SIZES_ALL]" - "[OBMC_FAMILY_MODES - 1]"); - optimize_cdf_table(&fc.ncobmc[0][0], probsfile, 2, cts_each_dim, - "static const aom_cdf_prob\n" - "default_ncobmc_cdf[BLOCK_SIZES_ALL]" - "[CDF_SIZE(OBMC_FAMILY_MODES)]"); -#endif -#endif // CONFIG_NCOBMC_ADAPT_WEIGHT -#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION /* Intra/inter flag */ cts_each_dim[0] = INTRA_INTER_CONTEXTS; cts_each_dim[1] = 2; - optimize_entropy_table(&fc.intra_inter[0][0], probsfile, 2, cts_each_dim, - NULL, 1, - "static const aom_prob default_intra_inter_p" - "[INTRA_INTER_CONTEXTS]"); optimize_cdf_table( &fc.intra_inter[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob\n" @@ -575,22 +465,14 @@ int main(int argc, const char **argv) { /* Single/comp ref flag */ cts_each_dim[0] = COMP_INTER_CONTEXTS; cts_each_dim[1] = 2; - optimize_entropy_table(&fc.comp_inter[0][0], probsfile, 2, cts_each_dim, NULL, - 1, - "static const aom_prob default_comp_inter_p" - "[COMP_INTER_CONTEXTS]"); optimize_cdf_table( &fc.comp_inter[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob\n" "default_comp_inter_cdf[COMP_INTER_CONTEXTS][CDF_SIZE(2)]"); -/* ext_comp_refs experiment */ -#if CONFIG_EXT_COMP_REFS + /* ext_comp_refs experiment */ cts_each_dim[0] = COMP_REF_TYPE_CONTEXTS; cts_each_dim[1] = 2; - optimize_entropy_table( - &fc.comp_ref_type[0][0], probsfile, 2, cts_each_dim, NULL, 1, - "static const aom_prob default_comp_ref_type_p[COMP_REF_TYPE_CONTEXTS]"); optimize_cdf_table( &fc.comp_ref_type[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob\n" @@ -599,37 +481,24 @@ int main(int argc, const char **argv) { cts_each_dim[0] = UNI_COMP_REF_CONTEXTS; cts_each_dim[1] = UNIDIR_COMP_REFS - 1; cts_each_dim[2] = 2; - optimize_entropy_table( - &fc.uni_comp_ref[0][0][0], probsfile, 3, cts_each_dim, NULL, 1, - "static const aom_prob\n" - "default_uni_comp_ref_p[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1]"); optimize_cdf_table(&fc.uni_comp_ref[0][0][0], probsfile, 3, cts_each_dim, "static const aom_cdf_prob\n" "default_uni_comp_ref_cdf[UNI_COMP_REF_CONTEXTS][UNIDIR_" "COMP_REFS - 1][CDF_SIZE(2)]"); -#endif /* Reference frame (single ref) */ cts_each_dim[0] = REF_CONTEXTS; cts_each_dim[1] = SINGLE_REFS - 1; cts_each_dim[2] = 2; - optimize_entropy_table( - &fc.single_ref[0][0][0], probsfile, 3, cts_each_dim, NULL, 1, - "static const aom_prob default_single_ref_p[REF_CONTEXTS]" - "[SINGLE_REFS - 1]"); optimize_cdf_table( &fc.single_ref[0][0][0], probsfile, 3, cts_each_dim, "static const aom_cdf_prob\n" "default_single_ref_cdf[REF_CONTEXTS][SINGLE_REFS - 1][CDF_SIZE(2)]"); -#if CONFIG_EXT_REFS /* ext_refs experiment */ cts_each_dim[0] = REF_CONTEXTS; cts_each_dim[1] = FWD_REFS - 1; cts_each_dim[2] = 2; - optimize_entropy_table( - &fc.comp_ref[0][0][0], probsfile, 3, cts_each_dim, NULL, 1, - "static const aom_prob default_comp_ref_p[REF_CONTEXTS][FWD_REFS - 1]"); optimize_cdf_table( &fc.comp_ref[0][0][0], probsfile, 3, cts_each_dim, "static const aom_cdf_prob\n" @@ -638,297 +507,248 @@ int main(int argc, const char **argv) { cts_each_dim[0] = REF_CONTEXTS; cts_each_dim[1] = BWD_REFS - 1; cts_each_dim[2] = 2; - optimize_entropy_table(&fc.comp_bwdref[0][0][0], probsfile, 3, cts_each_dim, - NULL, 1, - "static const aom_prob " - "default_comp_bwdref_p[REF_CONTEXTS][BWD_REFS - 1]"); optimize_cdf_table( &fc.comp_bwdref[0][0][0], probsfile, 3, cts_each_dim, "static const aom_cdf_prob\n" "default_comp_bwdref_cdf[REF_CONTEXTS][BWD_REFS - 1][CDF_SIZE(2)]"); -#else - /* Reference frame (compound refs) */ - cts_each_dim[0] = REF_CONTEXTS; - cts_each_dim[1] = COMP_REFS - 1; + + /* palette */ + cts_each_dim[0] = PALATTE_BSIZE_CTXS; + cts_each_dim[1] = PALETTE_SIZES; + optimize_cdf_table(&fc.palette_y_size[0][0], probsfile, 2, cts_each_dim, + "const aom_cdf_prob default_palette_y_size_cdf" + "[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)]"); + + cts_each_dim[0] = PALATTE_BSIZE_CTXS; + cts_each_dim[1] = PALETTE_SIZES; + optimize_cdf_table(&fc.palette_uv_size[0][0], probsfile, 2, cts_each_dim, + "const aom_cdf_prob default_palette_uv_size_cdf" + "[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)]"); + + cts_each_dim[0] = PALATTE_BSIZE_CTXS; + cts_each_dim[1] = PALETTE_Y_MODE_CONTEXTS; cts_each_dim[2] = 2; - optimize_entropy_table( - &fc.comp_ref[0][0][0], probsfile, 3, cts_each_dim, NULL, 1, - "static const aom_prob default_comp_ref_p[REF_CONTEXTS]" - "[COMP_REFS - 1]"); - optimize_cdf_table( - &fc.comp_ref[0][0][0], probsfile, 3, cts_each_dim, - "static const aom_cdf_prob\n" - "default_comp_ref_cdf[REF_CONTEXTS][COMP_REFS - 1][CDF_SIZE(2)]"); -#endif // CONFIG_EXT_REFS + optimize_cdf_table(&fc.palette_y_mode[0][0][0], probsfile, 3, cts_each_dim, + "const aom_cdf_prob default_palette_y_mode_cdf" + "[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS]" + "[CDF_SIZE(2)]"); -/* Compound single ref inter mode */ -#if CONFIG_COMPOUND_SINGLEREF - cts_each_dim[0] = COMP_INTER_MODE_CONTEXTS; + cts_each_dim[0] = PALETTE_UV_MODE_CONTEXTS; cts_each_dim[1] = 2; - optimize_entropy_table(&fc.comp_inter_mode[0][0], probsfile, 2, cts_each_dim, - NULL, 1, - "static const aom_prob " - "default_comp_inter_mode_p[COMP_INTER_MODE_CONTEXTS]"); - optimize_cdf_table(&fc.comp_inter_mode[0][0], probsfile, 2, cts_each_dim, - "static const aom_cdf_prob " - "default_comp_inter_mode_cdf[COMP_INTER_MODE_CONTEXTS][" - "CDF_SIZE(2)]"); -#endif - -/* Transform size */ -#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) - cts_each_dim[0] = 2; - optimize_entropy_table(&fc.quarter_tx_size[0], probsfile, 1, cts_each_dim, - NULL, 1, - "static const aom_prob default_quarter_tx_size_prob"); - optimize_cdf_table( - &fc.quarter_tx_size[0], probsfile, 1, cts_each_dim, - "static const aom_cdf_prob default_quarter_tx_size_cdf[CDF_SIZE(2)]"); -#endif -#if CONFIG_VAR_TX + optimize_cdf_table(&fc.palette_uv_mode[0][0], probsfile, 2, cts_each_dim, + "const aom_cdf_prob default_palette_uv_mode_cdf" + "[PALETTE_UV_MODE_CONTEXTS][CDF_SIZE(2)]"); + + cts_each_dim[0] = PALETTE_SIZES; + cts_each_dim[1] = PALETTE_COLOR_INDEX_CONTEXTS; + cts_each_dim[2] = PALETTE_COLORS; + int palette_color_indexes_each_ctx[PALETTE_SIZES] = { 2, 3, 4, 5, 6, 7, 8 }; + optimize_cdf_table_var_modes_3d( + &fc.palette_y_color_index[0][0][0], probsfile, 3, cts_each_dim, + palette_color_indexes_each_ctx, + "const aom_cdf_prob default_palette_y_color_index_cdf[PALETTE_SIZES]" + "[PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)]"); + + cts_each_dim[0] = PALETTE_SIZES; + cts_each_dim[1] = PALETTE_COLOR_INDEX_CONTEXTS; + cts_each_dim[2] = PALETTE_COLORS; + optimize_cdf_table_var_modes_3d( + &fc.palette_uv_color_index[0][0][0], probsfile, 3, cts_each_dim, + palette_color_indexes_each_ctx, + "const aom_cdf_prob default_palette_uv_color_index_cdf[PALETTE_SIZES]" + "[PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)]"); + + /* Transform size */ cts_each_dim[0] = TXFM_PARTITION_CONTEXTS; cts_each_dim[1] = 2; - optimize_entropy_table( - &fc.txfm_partition[0][0], probsfile, 2, cts_each_dim, NULL, 1, - "static const aom_prob " - "default_txfm_partition_probs[TXFM_PARTITION_CONTEXTS]"); optimize_cdf_table( &fc.txfm_partition[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob\n" "default_txfm_partition_cdf[TXFM_PARTITION_CONTEXTS][CDF_SIZE(2)]"); -#endif /* Skip flag */ cts_each_dim[0] = SKIP_CONTEXTS; cts_each_dim[1] = 2; - optimize_entropy_table( - &fc.skip[0][0], probsfile, 2, cts_each_dim, NULL, 1, - "static const aom_prob default_skip_probs[SKIP_CONTEXTS]"); optimize_cdf_table(&fc.skip[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob " "default_skip_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)]"); -/* intrabc experiment */ -#if CONFIG_INTRABC + /* Skip mode flag */ + cts_each_dim[0] = SKIP_MODE_CONTEXTS; + cts_each_dim[1] = 2; + optimize_cdf_table(&fc.skip_mode[0][0], probsfile, 2, cts_each_dim, + "static const aom_cdf_prob " + "default_skip_mode_cdfs[SKIP_MODE_CONTEXTS][CDF_SIZE(2)]"); + + /* joint compound flag */ + cts_each_dim[0] = COMP_INDEX_CONTEXTS; + cts_each_dim[1] = 2; + optimize_cdf_table(&fc.compound_index[0][0], probsfile, 2, cts_each_dim, + "static const aom_cdf_prob default_compound_idx_cdfs" + "[COMP_INDEX_CONTEXTS][CDF_SIZE(2)]"); + + cts_each_dim[0] = COMP_GROUP_IDX_CONTEXTS; + cts_each_dim[1] = 2; + optimize_cdf_table(&fc.comp_group_idx[0][0], probsfile, 2, cts_each_dim, + "static const aom_cdf_prob default_comp_group_idx_cdfs" + "[COMP_GROUP_IDX_CONTEXTS][CDF_SIZE(2)]"); + + /* intrabc */ cts_each_dim[0] = 2; - optimize_entropy_table(&fc.intrabc[0], probsfile, 1, cts_each_dim, NULL, 1, - "INTRABC_PROB_DEFAULT"); optimize_cdf_table( &fc.intrabc[0], probsfile, 1, cts_each_dim, "static const aom_cdf_prob default_intrabc_cdf[CDF_SIZE(2)]"); -#endif - /* delta_q */ - cts_each_dim[0] = DELTA_Q_PROBS; - cts_each_dim[1] = 2; - optimize_entropy_table( - &fc.delta_q[0][0], probsfile, 2, cts_each_dim, NULL, 1, - "static const aom_prob default_delta_q_probs[DELTA_Q_PROBS]"); -#if CONFIG_EXT_DELTA_Q - cts_each_dim[0] = DELTA_LF_PROBS; + /* filter_intra experiment */ + cts_each_dim[0] = FILTER_INTRA_MODES; + optimize_cdf_table( + &fc.filter_intra_mode[0], probsfile, 1, cts_each_dim, + "static const aom_cdf_prob " + "default_filter_intra_mode_cdf[CDF_SIZE(FILTER_INTRA_MODES)]"); + + cts_each_dim[0] = BLOCK_SIZES_ALL; cts_each_dim[1] = 2; - optimize_entropy_table( - &fc.delta_lf[0][0], probsfile, 2, cts_each_dim, NULL, 1, - "static const aom_prob default_delta_lf_probs[DELTA_LF_PROBS]"); -#endif - -/* Transform type */ -#if CONFIG_EXT_TX -// TODO(yuec): different trees are used depending on selected ext tx set -#else - // TODO(yuec): intra_ext_tx use different trees depending on the context - cts_each_dim[0] = EXT_TX_SIZES; - cts_each_dim[1] = TX_TYPES; - optimize_entropy_table(&fc.inter_ext_tx[0][0], probsfile, 2, cts_each_dim, - av1_ext_tx_tree, 0, - "static const aom_prob default_inter_ext_tx_prob" - "[EXT_TX_SIZES][TX_TYPES - 1]"); - optimize_cdf_table(&fc.inter_ext_tx[0][0], probsfile, 2, cts_each_dim, + optimize_cdf_table(&fc.filter_intra[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob " - "default_inter_ext_tx_prob[EXT_TX_SIZES][CDF_SIZE(TX_" - "TYPES)]"); -#endif + "default_filter_intra_cdfs[BLOCK_SIZES_ALL][CDF_SIZE(2)]"); -/* supertx experiment */ -#if CONFIG_SUPERTX - cts_each_dim[0] = PARTITION_SUPERTX_CONTEXTS; - cts_each_dim[1] = TX_SIZES; - cts_each_dim[2] = 2; - optimize_entropy_table( - &fc.supertx[0][0][0], probsfile, 3, cts_each_dim, NULL, 1, - "static const aom_prob\n" - "default_supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES]"); - optimize_cdf_table(&fc.supertx[0][0][0], probsfile, 3, cts_each_dim, - "static const aom_cdf_prob " - "default_supertx_cdf[PARTITION_SUPERTX_CONTEXTS][TX_SIZES]" + /* restoration type */ + cts_each_dim[0] = RESTORE_SWITCHABLE_TYPES; + optimize_cdf_table(&fc.switchable_restore[0], probsfile, 1, cts_each_dim, + "static const aom_cdf_prob default_switchable_restore_cdf" + "[CDF_SIZE(RESTORE_SWITCHABLE_TYPES)]"); + + cts_each_dim[0] = 2; + optimize_cdf_table(&fc.wiener_restore[0], probsfile, 1, cts_each_dim, + "static const aom_cdf_prob default_wiener_restore_cdf" + "[CDF_SIZE(2)]"); + + cts_each_dim[0] = 2; + optimize_cdf_table(&fc.sgrproj_restore[0], probsfile, 1, cts_each_dim, + "static const aom_cdf_prob default_sgrproj_restore_cdf" "[CDF_SIZE(2)]"); -#endif - -/* ext_intra experiment */ -#if CONFIG_EXT_INTRA -#if CONFIG_INTRA_INTERP - cts_each_dim[0] = INTRA_FILTERS + 1; - cts_each_dim[1] = INTRA_FILTERS; - optimize_entropy_table( - &fc.intra_filter[0][0], probsfile, 2, cts_each_dim, av1_intra_filter_tree, - 0, - "static const aom_prob\n" - "default_intra_filter_probs[INTRA_FILTERS + 1][INTRA_FILTERS - 1]"); - optimize_cdf_table(&fc.intra_filter[0][0], probsfile, 2, cts_each_dim, + + /* intra tx size */ + cts_each_dim[0] = MAX_TX_CATS; + cts_each_dim[1] = TX_SIZE_CONTEXTS; + cts_each_dim[2] = MAX_TX_DEPTH + 1; + int intra_tx_sizes_each_ctx[MAX_TX_CATS] = { 2, 3, 3, 3 }; + optimize_cdf_table_var_modes_3d( + &fc.intra_tx_size[0][0][0], probsfile, 3, cts_each_dim, + intra_tx_sizes_each_ctx, + "static const aom_cdf_prob default_tx_size_cdf" + "[MAX_TX_CATS][TX_SIZE_CONTEXTS][CDF_SIZE(MAX_TX_DEPTH + 1)]"); + + /* transform coding */ + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = TX_SIZES; + cts_each_dim[2] = TXB_SKIP_CONTEXTS; + cts_each_dim[3] = 2; + optimize_cdf_table(&fc.txb_skip[0][0][0][0], probsfile, 4, cts_each_dim, "static const aom_cdf_prob " - "default_intra_filter_cdf[INTRA_FILTERS + " - "1][CDF_SIZE(INTRA_FILTERS)]"); -#endif -#endif - -/* filter_intra experiment */ -#if CONFIG_FILTER_INTRA - cts_each_dim[0] = PLANE_TYPES; - cts_each_dim[1] = 2; - optimize_entropy_table(&fc.filter_intra[0][0], probsfile, 2, cts_each_dim, - NULL, 1, - "static const aom_prob default_filter_intra_probs[2]"); - optimize_cdf_table( - &fc.filter_intra[0][0], probsfile, 2, cts_each_dim, - "static const aom_cdf_prob default_filter_intra_cdf[2][CDF_SIZE(2)]"); -#endif + "av1_default_txb_skip_cdfs[TOKEN_CDF_Q_CTXS][TX_SIZES]" + "[TXB_SKIP_CONTEXTS][CDF_SIZE(2)]"); -#if CONFIG_LV_MAP - cts_each_dim[0] = TX_SIZES; - cts_each_dim[1] = PLANE_TYPES; - cts_each_dim[2] = NUM_BASE_LEVELS; - cts_each_dim[3] = COEFF_BASE_CONTEXTS; + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = TX_SIZES; + cts_each_dim[2] = PLANE_TYPES; + cts_each_dim[3] = EOB_COEF_CONTEXTS; cts_each_dim[4] = 2; - optimize_entropy_table(&fc.coeff_base[0][0][0][0][0], probsfile, 5, - cts_each_dim, NULL, 1, - "static const aom_prob " - "default_coeff_base[TX_SIZES][PLANE_TYPES][NUM_BASE_" - "LEVELS][COEFF_BASE_CONTEXTS]"); - optimize_cdf_table(&fc.coeff_base[0][0][0][0][0], probsfile, 5, cts_each_dim, - "static const aom_cdf_prob " - "default_coeff_base_cdf[TX_SIZES][PLANE_TYPES][NUM_BASE_" - "LEVELS][COEFF_BASE_CONTEXTS][CDF_SIZE(2)]"); + optimize_cdf_table( + &fc.eob_extra[0][0][0][0][0], probsfile, 5, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_extra_cdfs " + "[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS]" + "[CDF_SIZE(2)]"); - cts_each_dim[0] = TX_SIZES; + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; cts_each_dim[1] = PLANE_TYPES; - cts_each_dim[2] = SIG_COEF_CONTEXTS; - cts_each_dim[3] = 2; - optimize_entropy_table( - &fc.nz_map[0][0][0][0], probsfile, 4, cts_each_dim, NULL, 1, - "static const aom_prob " - "default_nz_map[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]"); - optimize_cdf_table(&fc.nz_map[0][0][0][0], probsfile, 4, cts_each_dim, - "static const aom_cdf_prob " - "default_nz_map_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_" - "CONTEXTS][CDF_SIZE(2)]"); + cts_each_dim[2] = 2; + cts_each_dim[3] = 5; + optimize_cdf_table(&fc.eob_multi16[0][0][0][0], probsfile, 4, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi16_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(5)]"); - cts_each_dim[0] = TX_SIZES; + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; cts_each_dim[1] = PLANE_TYPES; - cts_each_dim[2] = EOB_COEF_CONTEXTS; - cts_each_dim[3] = 2; - optimize_entropy_table( - &fc.eob_flag[0][0][0][0], probsfile, 4, cts_each_dim, NULL, 1, - "static const aom_prob " - "default_eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS]"); - optimize_cdf_table(&fc.eob_flag[0][0][0][0], probsfile, 4, cts_each_dim, - "static const aom_cdf_prob " - "default_eob_flag_cdf[TX_SIZES][PLANE_TYPES][EOB_COEF_" - "CONTEXTS][CDF_SIZE(2)]"); + cts_each_dim[2] = 2; + cts_each_dim[3] = 6; + optimize_cdf_table(&fc.eob_multi32[0][0][0][0], probsfile, 4, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi32_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(6)]"); - cts_each_dim[0] = TX_SIZES; + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; cts_each_dim[1] = PLANE_TYPES; - cts_each_dim[2] = LEVEL_CONTEXTS; - cts_each_dim[3] = 2; - optimize_entropy_table( - &fc.coeff_lps[0][0][0][0], probsfile, 4, cts_each_dim, NULL, 1, - "static const aom_prob " - "default_coeff_lps[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]"); - optimize_cdf_table(&fc.coeff_lps[0][0][0][0], probsfile, 4, cts_each_dim, - "static const aom_cdf_prob " - "default_coeff_lps_cdf[TX_SIZES][PLANE_TYPES][LEVEL_" - "CONTEXTS][CDF_SIZE(2)]"); + cts_each_dim[2] = 2; + cts_each_dim[3] = 7; + optimize_cdf_table(&fc.eob_multi64[0][0][0][0], probsfile, 4, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi64_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(7)]"); -#if BR_NODE - cts_each_dim[0] = TX_SIZES; + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; cts_each_dim[1] = PLANE_TYPES; - cts_each_dim[2] = BASE_RANGE_SETS; - cts_each_dim[3] = LEVEL_CONTEXTS; - cts_each_dim[4] = 2; - optimize_entropy_table(&fc.coeff_br[0][0][0][0][0], probsfile, 5, - cts_each_dim, NULL, 1, - "static const aom_prob " - "default_coeff_br[TX_SIZES][PLANE_TYPES][BASE_RANGE_" - "SETS][LEVEL_CONTEXTS]"); - optimize_cdf_table(&fc.coeff_br[0][0][0][0][0], probsfile, 5, cts_each_dim, - "static const aom_cdf_prob " - "default_coeff_br_cdf[TX_SIZES][PLANE_TYPES][BASE_RANGE_" - "SETS][LEVEL_CONTEXTS][CDF_SIZE(2)]"); -#endif // BR_NODE + cts_each_dim[2] = 2; + cts_each_dim[3] = 8; + optimize_cdf_table(&fc.eob_multi128[0][0][0][0], probsfile, 4, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi128_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(8)]"); -#if CONFIG_CTX1D - cts_each_dim[0] = TX_SIZES; + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; cts_each_dim[1] = PLANE_TYPES; - cts_each_dim[2] = TX_CLASSES; - cts_each_dim[3] = 2; - optimize_entropy_table(&fc.eob_mode[0][0][0][0], probsfile, 4, cts_each_dim, - NULL, 1, - "static const aom_prob " - "default_eob_mode[TX_SIZES][PLANE_TYPES][TX_CLASSES]"); - optimize_cdf_table(&fc.eob_mode[0][0][0][0], probsfile, 4, cts_each_dim, - "static const aom_cdf_prob " - "default_eob_mode_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES][" - "CDF_SIZE(2)]"); + cts_each_dim[2] = 2; + cts_each_dim[3] = 9; + optimize_cdf_table(&fc.eob_multi256[0][0][0][0], probsfile, 4, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi256_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(9)]"); - cts_each_dim[0] = TX_SIZES; + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; cts_each_dim[1] = PLANE_TYPES; - cts_each_dim[2] = TX_CLASSES; - cts_each_dim[3] = EMPTY_LINE_CONTEXTS; - cts_each_dim[4] = 2; - optimize_entropy_table(&fc.empty_line[0][0][0][0][0], probsfile, 5, - cts_each_dim, NULL, 1, - "static const aom_prob " - "default_empty_line[TX_SIZES][PLANE_TYPES][TX_CLASSES]" - "[EMPTY_LINE_CONTEXTS]"); - optimize_cdf_table(&fc.empty_line[0][0][0][0][0], probsfile, 5, cts_each_dim, - "static const aom_cdf_prob " - "default_empty_line_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES]" - "[EMPTY_LINE_CONTEXTS][CDF_SIZE(2)]"); + cts_each_dim[2] = 2; + cts_each_dim[3] = 10; + optimize_cdf_table(&fc.eob_multi512[0][0][0][0], probsfile, 4, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi512_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(10)]"); - cts_each_dim[0] = TX_SIZES; + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; cts_each_dim[1] = PLANE_TYPES; - cts_each_dim[2] = TX_CLASSES; - cts_each_dim[3] = HV_EOB_CONTEXTS; - cts_each_dim[4] = 2; - optimize_entropy_table( - &fc.hv_eob[0][0][0][0][0], probsfile, 5, cts_each_dim, NULL, 1, - "static const aom_prob " - "default_hv_eob[TX_SIZES][PLANE_TYPES][TX_CLASSES][HV_EOB_CONTEXTS]"); - optimize_cdf_table(&fc.hv_eob[0][0][0][0][0], probsfile, 5, cts_each_dim, + cts_each_dim[2] = 2; + cts_each_dim[3] = 11; + optimize_cdf_table(&fc.eob_multi1024[0][0][0][0], probsfile, 4, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi1024_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(11)]"); + + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = TX_SIZES; + cts_each_dim[2] = PLANE_TYPES; + cts_each_dim[3] = LEVEL_CONTEXTS; + cts_each_dim[4] = BR_CDF_SIZE; + optimize_cdf_table(&fc.coeff_lps_multi[0][0][0][0][0], probsfile, 5, + cts_each_dim, "static const aom_cdf_prob " - "default_hv_eob_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES][HV_" - "EOB_CONTEXTS][CDF_SIZE(2)]"); -#endif // CONFIG_CTX1D -#endif // CONFIG_LV_MAP - -/* lgt_from_pred experiment */ -#if CONFIG_LGT_FROM_PRED - cts_each_dim[0] = LGT_SIZES; - if (LGT_FROM_PRED_INTRA) { - cts_each_dim[1] = INTRA_MODES; - cts_each_dim[2] = 2; - optimize_entropy_table(&fc.intra_lgt[0][0][0], probsfile, 3, cts_each_dim, - NULL, 1, - "static const aom_prob default_intra_lgt_prob" - "[LGT_SIZES][INTRA_MODES][2]"); - } - if (LGT_FROM_PRED_INTER) { - cts_each_dim[1] = 2; - optimize_entropy_table(&fc.inter_lgt[0][0], probsfile, 2, cts_each_dim, - NULL, 1, - "static const aom_prob default_inter_lgt_prob" - "[LGT_SIZES][2]"); - } -#endif // CONFIG_LGT_FROM_PRED + "av1_default_coeff_lps_multi_cdfs[TOKEN_CDF_Q_CTXS]" + "[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]" + "[CDF_SIZE(BR_CDF_SIZE)]"); + + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = TX_SIZES; + cts_each_dim[2] = PLANE_TYPES; + cts_each_dim[3] = SIG_COEF_CONTEXTS; + cts_each_dim[4] = NUM_BASE_LEVELS + 2; + optimize_cdf_table( + &fc.coeff_base_multi[0][0][0][0][0], probsfile, 5, cts_each_dim, + "static const aom_cdf_prob av1_default_coeff_base_multi_cdfs" + "[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]" + "[CDF_SIZE(NUM_BASE_LEVELS + 2)]"); + + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = TX_SIZES; + cts_each_dim[2] = PLANE_TYPES; + cts_each_dim[3] = SIG_COEF_CONTEXTS_EOB; + cts_each_dim[4] = NUM_BASE_LEVELS + 1; + optimize_cdf_table( + &fc.coeff_base_eob_multi[0][0][0][0][0], probsfile, 5, cts_each_dim, + "static const aom_cdf_prob av1_default_coeff_base_eob_multi_cdfs" + "[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS_EOB]" + "[CDF_SIZE(NUM_BASE_LEVELS + 1)]"); fclose(statsfile); fclose(logfile); diff --git a/third_party/aom/tools/author_first_release.sh b/third_party/aom/tools/author_first_release.sh deleted file mode 100755 index 7b0b79721..000000000 --- a/third_party/aom/tools/author_first_release.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -## -## List the release each author first contributed to. -## -## Usage: author_first_release.sh [TAGS] -## -## If the TAGS arguments are unspecified, all tags reported by `git tag` -## will be considered. -## -tags=${@:-$(git tag)} -for tag in $tags; do - git shortlog -n -e -s $tag | - cut -f2- | - awk "{print \"${tag#v}\t\"\$0}" -done | sort -k2 | uniq -f2 diff --git a/third_party/aom/tools/build_inspector.sh b/third_party/aom/tools/build_inspector.sh deleted file mode 100755 index 25e0de62f..000000000 --- a/third_party/aom/tools/build_inspector.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/sh -## -## Copyright (c) 2017, Alliance for Open Media. All rights reserved -## -## This source code is subject to the terms of the BSD 2 Clause License and -## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -## was not distributed with this source code in the LICENSE file, you can -## obtain it at www.aomedia.org/license/software. If the Alliance for Open -## Media Patent License 1.0 was not distributed with this source code in the -## PATENTS file, you can obtain it at www.aomedia.org/license/patent. -## - -if ! [ -x "$(command -v emcc)" ] \ - || ! [ -x "$(command -v emconfigure)" ] \ - || ! [ -x "$(command -v emmake)" ]; then - cat << EOF >& 2 -Emscripten SDK is not available (emcc, emconfigure or emmake is missing). -Install it from -https://kripken.github.io/emscripten-site/docs/getting_started/downloads.html -and try again. -EOF - exit 1 -fi - -echo 'Building JS Inspector' -if [ ! -d ".inspect" ]; then - mkdir .inspect - cd .inspect && emconfigure ../../configure \ - --disable-multithread \ - --disable-runtime-cpu-detect \ - --target=generic-gnu \ - --enable-accounting \ - --disable-docs \ - --disable-unit-tests \ - --enable-inspection \ - --enable-highbitdepth \ - --extra-cflags="-D_POSIX_SOURCE" - cd .. -fi - -cd .inspect -emmake make -j 8 -cp examples/inspect inspect.bc -emcc -O3 inspect.bc -o inspect.js \ - -s TOTAL_MEMORY=134217728 \ - -s MODULARIZE=1 \ - -s EXPORT_NAME="'DecoderModule'" \ - --post-js "../inspect-post.js" \ - --memory-init-file 0 -cp inspect.js ../inspect.js diff --git a/third_party/aom/tools/dump_obu.cc b/third_party/aom/tools/dump_obu.cc new file mode 100644 index 000000000..30ee5e7a1 --- /dev/null +++ b/third_party/aom/tools/dump_obu.cc @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include +#include + +#include +#include + +#include "config/aom_config.h" + +#include "common/ivfdec.h" +#include "common/obudec.h" +#include "common/tools_common.h" +#include "common/webmdec.h" +#include "tools/obu_parser.h" + +namespace { + +const size_t kInitialBufferSize = 100 * 1024; + +struct InputContext { + InputContext() = default; + ~InputContext() { free(unit_buffer); } + + void Init() { + memset(avx_ctx, 0, sizeof(*avx_ctx)); + memset(obu_ctx, 0, sizeof(*obu_ctx)); + obu_ctx->avx_ctx = avx_ctx; +#if CONFIG_WEBM_IO + memset(webm_ctx, 0, sizeof(*webm_ctx)); +#endif + } + + AvxInputContext *avx_ctx = nullptr; + ObuDecInputContext *obu_ctx = nullptr; +#if CONFIG_WEBM_IO + WebmInputContext *webm_ctx = nullptr; +#endif + uint8_t *unit_buffer = nullptr; + size_t unit_buffer_size = 0; +}; + +void PrintUsage() { + printf("Libaom OBU dump.\nUsage: dump_obu \n"); +} + +VideoFileType GetFileType(InputContext *ctx) { + if (file_is_ivf(ctx->avx_ctx)) return FILE_TYPE_IVF; + if (file_is_obu(ctx->obu_ctx)) return FILE_TYPE_OBU; +#if CONFIG_WEBM_IO + if (file_is_webm(ctx->webm_ctx, ctx->avx_ctx)) return FILE_TYPE_WEBM; +#endif + return FILE_TYPE_RAW; +} + +bool ReadTemporalUnit(InputContext *ctx, size_t *unit_size) { + const VideoFileType file_type = ctx->avx_ctx->file_type; + switch (file_type) { + case FILE_TYPE_IVF: { + if (ivf_read_frame(ctx->avx_ctx->file, &ctx->unit_buffer, unit_size, + &ctx->unit_buffer_size, NULL)) { + return false; + } + break; + } + case FILE_TYPE_OBU: { + if (obudec_read_temporal_unit(ctx->obu_ctx, &ctx->unit_buffer, unit_size, + &ctx->unit_buffer_size)) { + return false; + } + break; + } +#if CONFIG_WEBM_IO + case FILE_TYPE_WEBM: { + if (webm_read_frame(ctx->webm_ctx, &ctx->unit_buffer, unit_size, + &ctx->unit_buffer_size)) { + return false; + } + break; + } +#endif + default: + // TODO(tomfinegan): Abuse FILE_TYPE_RAW for AV1/OBU elementary streams? + fprintf(stderr, "Error: Unsupported file type.\n"); + return false; + } + + return true; +} + +} // namespace + +int main(int argc, const char *argv[]) { + // TODO(tomfinegan): Could do with some params for verbosity. + if (argc < 2) { + PrintUsage(); + return EXIT_SUCCESS; + } + + const std::string filename = argv[1]; + + using FilePtr = std::unique_ptr; + FilePtr input_file(fopen(filename.c_str(), "rb"), &fclose); + if (input_file.get() == nullptr) { + input_file.release(); + fprintf(stderr, "Error: Cannot open input file.\n"); + return EXIT_FAILURE; + } + + AvxInputContext avx_ctx; + InputContext input_ctx; + input_ctx.avx_ctx = &avx_ctx; + ObuDecInputContext obu_ctx; + input_ctx.obu_ctx = &obu_ctx; +#if CONFIG_WEBM_IO + WebmInputContext webm_ctx; + input_ctx.webm_ctx = &webm_ctx; +#endif + + input_ctx.Init(); + avx_ctx.file = input_file.get(); + avx_ctx.file_type = GetFileType(&input_ctx); + + // Note: the reader utilities will realloc the buffer using realloc() etc. + // Can't have nice things like unique_ptr wrappers with that type of + // behavior underneath the function calls. + input_ctx.unit_buffer = + reinterpret_cast(calloc(kInitialBufferSize, 1)); + if (!input_ctx.unit_buffer) { + fprintf(stderr, "Error: No memory, can't alloc input buffer.\n"); + return EXIT_FAILURE; + } + input_ctx.unit_buffer_size = kInitialBufferSize; + + size_t unit_size = 0; + int unit_number = 0; + int64_t obu_overhead_bytes_total = 0; + while (ReadTemporalUnit(&input_ctx, &unit_size)) { + printf("Temporal unit %d\n", unit_number); + + int obu_overhead_current_unit = 0; + if (!aom_tools::DumpObu(input_ctx.unit_buffer, static_cast(unit_size), + &obu_overhead_current_unit)) { + fprintf(stderr, "Error: Temporal Unit parse failed on unit number %d.\n", + unit_number); + return EXIT_FAILURE; + } + printf(" OBU overhead: %d\n", obu_overhead_current_unit); + ++unit_number; + obu_overhead_bytes_total += obu_overhead_current_unit; + } + + printf("File total OBU overhead: %" PRId64 "\n", obu_overhead_bytes_total); + return EXIT_SUCCESS; +} diff --git a/third_party/aom/tools/ftfy.sh b/third_party/aom/tools/ftfy.sh deleted file mode 100755 index 315da1af5..000000000 --- a/third_party/aom/tools/ftfy.sh +++ /dev/null @@ -1,158 +0,0 @@ -#!/bin/sh -self="$0" -dirname_self=$(dirname "$self") - -usage() { - cat <&2 -Usage: $self [option] - -This script applies a whitespace transformation to the commit at HEAD. If no -options are given, then the modified files are left in the working tree. - -Options: - -h, --help Shows this message - -n, --dry-run Shows a diff of the changes to be made. - --amend Squashes the changes into the commit at HEAD - This option will also reformat the commit message. - --commit Creates a new commit containing only the whitespace changes - --msg-only Reformat the commit message only, ignore the patch itself. - -EOF - rm -f ${CLEAN_FILES} - exit 1 -} - - -log() { - echo "${self##*/}: $@" >&2 -} - - -aom_style() { - for f; do - case "$f" in - *.h|*.c|*.cc) - clang-format -i --style=file "$f" - ;; - esac - done -} - - -apply() { - [ $INTERSECT_RESULT -ne 0 ] && patch -p1 < "$1" -} - - -commit() { - LAST_CHANGEID=$(git show | awk '/Change-Id:/{print $2}') - if [ -z "$LAST_CHANGEID" ]; then - log "HEAD doesn't have a Change-Id, unable to generate a new commit" - exit 1 - fi - - # Build a deterministic Change-Id from the parent's - NEW_CHANGEID=${LAST_CHANGEID}-styled - NEW_CHANGEID=I$(echo $NEW_CHANGEID | git hash-object --stdin) - - # Commit, preserving authorship from the parent commit. - git commit -a -C HEAD > /dev/null - git commit --amend -F- << EOF -Cosmetic: Fix whitespace in change ${LAST_CHANGEID:0:9} - -Change-Id: ${NEW_CHANGEID} -EOF -} - - -show_commit_msg_diff() { - if [ $DIFF_MSG_RESULT -ne 0 ]; then - log "Modified commit message:" - diff -u "$ORIG_COMMIT_MSG" "$NEW_COMMIT_MSG" | tail -n +3 - fi -} - - -amend() { - show_commit_msg_diff - if [ $DIFF_MSG_RESULT -ne 0 ] || [ $INTERSECT_RESULT -ne 0 ]; then - git commit -a --amend -F "$NEW_COMMIT_MSG" - fi -} - - -diff_msg() { - git log -1 --format=%B > "$ORIG_COMMIT_MSG" - "${dirname_self}"/wrap-commit-msg.py \ - < "$ORIG_COMMIT_MSG" > "$NEW_COMMIT_MSG" - cmp -s "$ORIG_COMMIT_MSG" "$NEW_COMMIT_MSG" - DIFF_MSG_RESULT=$? -} - - -# Temporary files -ORIG_DIFF=orig.diff.$$ -MODIFIED_DIFF=modified.diff.$$ -FINAL_DIFF=final.diff.$$ -ORIG_COMMIT_MSG=orig.commit-msg.$$ -NEW_COMMIT_MSG=new.commit-msg.$$ -CLEAN_FILES="${ORIG_DIFF} ${MODIFIED_DIFF} ${FINAL_DIFF}" -CLEAN_FILES="${CLEAN_FILES} ${ORIG_COMMIT_MSG} ${NEW_COMMIT_MSG}" - -# Preconditions -[ $# -lt 2 ] || usage - -if ! clang-format -version >/dev/null 2>&1; then - log "clang-format not found" - exit 1 -fi - -if ! git diff --quiet HEAD; then - log "Working tree is dirty, commit your changes first" - exit 1 -fi - -# Need to be in the root -cd "$(git rev-parse --show-toplevel)" - -# Collect the original diff -git show > "${ORIG_DIFF}" - -# Apply the style guide on new and modified files and collect its diff -for f in $(git diff HEAD^ --name-only -M90 --diff-filter=AM); do - case "$f" in - third_party/*) continue;; - esac - aom_style "$f" -done -git diff --no-color --no-ext-diff > "${MODIFIED_DIFF}" - -# Intersect the two diffs -"${dirname_self}"/intersect-diffs.py \ - "${ORIG_DIFF}" "${MODIFIED_DIFF}" > "${FINAL_DIFF}" -INTERSECT_RESULT=$? -git reset --hard >/dev/null - -# Fixup the commit message -diff_msg - -# Handle options -if [ -n "$1" ]; then - case "$1" in - -h|--help) usage;; - -n|--dry-run) cat "${FINAL_DIFF}"; show_commit_msg_diff;; - --commit) apply "${FINAL_DIFF}"; commit;; - --amend) apply "${FINAL_DIFF}"; amend;; - --msg-only) amend;; - *) usage;; - esac -else - apply "${FINAL_DIFF}" - if ! git diff --quiet; then - log "Formatting changes applied, verify and commit." - log "See also: http://www.webmproject.org/code/contribute/conventions/" - git diff --stat - fi -fi - -rm -f ${CLEAN_FILES} diff --git a/third_party/aom/tools/obu_parser.cc b/third_party/aom/tools/obu_parser.cc new file mode 100644 index 000000000..2d0f5b27c --- /dev/null +++ b/third_party/aom/tools/obu_parser.cc @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include +#include + +#include "aom/aom_codec.h" +#include "aom/aom_integer.h" +#include "aom_ports/mem_ops.h" +#include "av1/decoder/obu.h" +#include "tools/obu_parser.h" + +namespace aom_tools { + +// Basic OBU syntax +// 8 bits: Header +// 7 +// forbidden bit +// 6,5,4,3 +// type bits +// 2 +// extension flag bit +// 1 +// has size field bit +// 0 +// reserved bit +const uint32_t kObuForbiddenBitMask = 0x1; +const uint32_t kObuForbiddenBitShift = 7; +const uint32_t kObuTypeBitsMask = 0xF; +const uint32_t kObuTypeBitsShift = 3; +const uint32_t kObuExtensionFlagBitMask = 0x1; +const uint32_t kObuExtensionFlagBitShift = 2; +const uint32_t kObuHasSizeFieldBitMask = 0x1; +const uint32_t kObuHasSizeFieldBitShift = 1; + +// When extension flag bit is set: +// 8 bits: extension header +// 7,6,5 +// temporal ID +// 4,3 +// spatial ID +// 2,1,0 +// reserved bits +const uint32_t kObuExtTemporalIdBitsMask = 0x7; +const uint32_t kObuExtTemporalIdBitsShift = 5; +const uint32_t kObuExtSpatialIdBitsMask = 0x3; +const uint32_t kObuExtSpatialIdBitsShift = 3; + +bool ValidObuType(int obu_type) { + switch (obu_type) { + case OBU_SEQUENCE_HEADER: + case OBU_TEMPORAL_DELIMITER: + case OBU_FRAME_HEADER: + case OBU_TILE_GROUP: + case OBU_METADATA: + case OBU_FRAME: + case OBU_REDUNDANT_FRAME_HEADER: + case OBU_TILE_LIST: + case OBU_PADDING: return true; + } + return false; +} + +bool ParseObuHeader(uint8_t obu_header_byte, ObuHeader *obu_header) { + const int forbidden_bit = + (obu_header_byte >> kObuForbiddenBitShift) & kObuForbiddenBitMask; + if (forbidden_bit) { + fprintf(stderr, "Invalid OBU, forbidden bit set.\n"); + return false; + } + + obu_header->type = static_cast( + (obu_header_byte >> kObuTypeBitsShift) & kObuTypeBitsMask); + if (!ValidObuType(obu_header->type)) { + fprintf(stderr, "Invalid OBU type: %d.\n", obu_header->type); + return false; + } + + obu_header->has_extension = + (obu_header_byte >> kObuExtensionFlagBitShift) & kObuExtensionFlagBitMask; + obu_header->has_size_field = + (obu_header_byte >> kObuHasSizeFieldBitShift) & kObuHasSizeFieldBitMask; + return true; +} + +bool ParseObuExtensionHeader(uint8_t ext_header_byte, ObuHeader *obu_header) { + obu_header->temporal_layer_id = + (ext_header_byte >> kObuExtTemporalIdBitsShift) & + kObuExtTemporalIdBitsMask; + obu_header->spatial_layer_id = + (ext_header_byte >> kObuExtSpatialIdBitsShift) & kObuExtSpatialIdBitsMask; + + return true; +} + +void PrintObuHeader(const ObuHeader *header) { + printf( + " OBU type: %s\n" + " extension: %s\n", + aom_obu_type_to_string(static_cast(header->type)), + header->has_extension ? "yes" : "no"); + if (header->has_extension) { + printf( + " temporal_id: %d\n" + " spatial_id: %d\n", + header->temporal_layer_id, header->temporal_layer_id); + } +} + +bool DumpObu(const uint8_t *data, int length, int *obu_overhead_bytes) { + const int kObuHeaderSizeBytes = 1; + const int kMinimumBytesRequired = 1 + kObuHeaderSizeBytes; + int consumed = 0; + int obu_overhead = 0; + ObuHeader obu_header; + while (consumed < length) { + const int remaining = length - consumed; + if (remaining < kMinimumBytesRequired) { + fprintf(stderr, + "OBU parse error. Did not consume all data, %d bytes remain.\n", + remaining); + return false; + } + + int obu_header_size = 0; + + memset(&obu_header, 0, sizeof(obu_header)); + const uint8_t obu_header_byte = *(data + consumed); + if (!ParseObuHeader(obu_header_byte, &obu_header)) { + fprintf(stderr, "OBU parsing failed at offset %d.\n", consumed); + return false; + } + + ++obu_overhead; + ++obu_header_size; + + if (obu_header.has_extension) { + const uint8_t obu_ext_header_byte = + *(data + consumed + kObuHeaderSizeBytes); + if (!ParseObuExtensionHeader(obu_ext_header_byte, &obu_header)) { + fprintf(stderr, "OBU extension parsing failed at offset %d.\n", + consumed + kObuHeaderSizeBytes); + return false; + } + + ++obu_overhead; + ++obu_header_size; + } + + PrintObuHeader(&obu_header); + + uint64_t obu_size = 0; + size_t length_field_size = 0; + if (aom_uleb_decode(data + consumed + obu_header_size, + remaining - obu_header_size, &obu_size, + &length_field_size) != 0) { + fprintf(stderr, "OBU size parsing failed at offset %d.\n", + consumed + obu_header_size); + return false; + } + int current_obu_length = static_cast(obu_size); + if (obu_header_size + static_cast(length_field_size) + + current_obu_length > + remaining) { + fprintf(stderr, "OBU parsing failed: not enough OBU data.\n"); + return false; + } + consumed += obu_header_size + static_cast(length_field_size) + + current_obu_length; + printf(" length: %d\n", + static_cast(obu_header_size + length_field_size + + current_obu_length)); + } + + if (obu_overhead_bytes != nullptr) *obu_overhead_bytes = obu_overhead; + printf(" TU size: %d\n", consumed); + + return true; +} + +} // namespace aom_tools diff --git a/third_party/aom/tools/obu_parser.h b/third_party/aom/tools/obu_parser.h new file mode 100644 index 000000000..86e7c4581 --- /dev/null +++ b/third_party/aom/tools/obu_parser.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef TOOLS_OBU_PARSER_H_ +#define TOOLS_OBU_PARSER_H_ + +#include + +namespace aom_tools { + +// Print information obtained from OBU(s) in data until data is exhausted or an +// error occurs. Returns true when all data is consumed successfully, and +// optionally reports OBU storage overhead via obu_overhead_bytes when the +// pointer is non-null. +bool DumpObu(const uint8_t *data, int length, int *obu_overhead_bytes); + +} // namespace aom_tools + +#endif // TOOLS_OBU_PARSER_H_ diff --git a/third_party/aom/tools/txfm_analyzer/txfm_gen_code.cc b/third_party/aom/tools/txfm_analyzer/txfm_gen_code.cc new file mode 100644 index 000000000..7c5400b91 --- /dev/null +++ b/third_party/aom/tools/txfm_analyzer/txfm_gen_code.cc @@ -0,0 +1,580 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include +#include +#include +#include +#include + +#include "tools/txfm_analyzer/txfm_graph.h" + +typedef enum CODE_TYPE { + CODE_TYPE_C, + CODE_TYPE_SSE2, + CODE_TYPE_SSE4_1 +} CODE_TYPE; + +int get_cos_idx(double value, int mod) { + return round(acos(fabs(value)) / PI * mod); +} + +char *cos_text_arr(double value, int mod, char *text, int size) { + int num = get_cos_idx(value, mod); + if (value < 0) { + snprintf(text, size, "-cospi[%2d]", num); + } else { + snprintf(text, size, " cospi[%2d]", num); + } + + if (num == 0) + printf("v: %f -> %d/%d v==-1 is %d\n", value, num, mod, value == -1); + + return text; +} + +char *cos_text_sse2(double w0, double w1, int mod, char *text, int size) { + int idx0 = get_cos_idx(w0, mod); + int idx1 = get_cos_idx(w1, mod); + char p[] = "p"; + char n[] = "m"; + char *sgn0 = w0 < 0 ? n : p; + char *sgn1 = w1 < 0 ? n : p; + snprintf(text, size, "cospi_%s%02d_%s%02d", sgn0, idx0, sgn1, idx1); + return text; +} + +char *cos_text_sse4_1(double w, int mod, char *text, int size) { + int idx = get_cos_idx(w, mod); + char p[] = "p"; + char n[] = "m"; + char *sgn = w < 0 ? n : p; + snprintf(text, size, "cospi_%s%02d", sgn, idx); + return text; +} + +void node_to_code_c(Node *node, const char *buf0, const char *buf1) { + int cnt = 0; + for (int i = 0; i < 2; i++) { + if (fabs(node->inWeight[i]) == 1 || fabs(node->inWeight[i]) == 0) cnt++; + } + if (cnt == 2) { + int cnt2 = 0; + printf(" %s[%d] =", buf1, node->nodeIdx); + for (int i = 0; i < 2; i++) { + if (fabs(node->inWeight[i]) == 1) { + cnt2++; + } + } + if (cnt2 == 2) { + printf(" apply_value("); + } + int cnt1 = 0; + for (int i = 0; i < 2; i++) { + if (node->inWeight[i] == 1) { + if (cnt1 > 0) + printf(" + %s[%d]", buf0, node->inNodeIdx[i]); + else + printf(" %s[%d]", buf0, node->inNodeIdx[i]); + cnt1++; + } else if (node->inWeight[i] == -1) { + if (cnt1 > 0) + printf(" - %s[%d]", buf0, node->inNodeIdx[i]); + else + printf("-%s[%d]", buf0, node->inNodeIdx[i]); + cnt1++; + } + } + if (cnt2 == 2) { + printf(", stage_range[stage])"); + } + printf(";\n"); + } else { + char w0[100]; + char w1[100]; + printf( + " %s[%d] = half_btf(%s, %s[%d], %s, %s[%d], " + "cos_bit);\n", + buf1, node->nodeIdx, cos_text_arr(node->inWeight[0], COS_MOD, w0, 100), + buf0, node->inNodeIdx[0], + cos_text_arr(node->inWeight[1], COS_MOD, w1, 100), buf0, + node->inNodeIdx[1]); + } +} + +void gen_code_c(Node *node, int stage_num, int node_num, TYPE_TXFM type) { + char *fun_name = new char[100]; + get_fun_name(fun_name, 100, type, node_num); + + printf("\n"); + printf( + "void av1_%s(const int32_t *input, int32_t *output, int8_t cos_bit, " + "const int8_t* stage_range) " + "{\n", + fun_name); + printf(" assert(output != input);\n"); + printf(" const int32_t size = %d;\n", node_num); + printf(" const int32_t *cospi = cospi_arr(cos_bit);\n"); + printf("\n"); + + printf(" int32_t stage = 0;\n"); + printf(" int32_t *bf0, *bf1;\n"); + printf(" int32_t step[%d];\n", node_num); + + const char *buf0 = "bf0"; + const char *buf1 = "bf1"; + const char *input = "input"; + + int si = 0; + printf("\n"); + printf(" // stage %d;\n", si); + printf(" apply_range(stage, input, %s, size, stage_range[stage]);\n", input); + + si = 1; + printf("\n"); + printf(" // stage %d;\n", si); + printf(" stage++;\n"); + if (si % 2 == (stage_num - 1) % 2) { + printf(" %s = output;\n", buf1); + } else { + printf(" %s = step;\n", buf1); + } + + for (int ni = 0; ni < node_num; ni++) { + int idx = get_idx(si, ni, node_num); + node_to_code_c(node + idx, input, buf1); + } + + printf(" range_check_buf(stage, input, bf1, size, stage_range[stage]);\n"); + + for (int si = 2; si < stage_num; si++) { + printf("\n"); + printf(" // stage %d\n", si); + printf(" stage++;\n"); + if (si % 2 == (stage_num - 1) % 2) { + printf(" %s = step;\n", buf0); + printf(" %s = output;\n", buf1); + } else { + printf(" %s = output;\n", buf0); + printf(" %s = step;\n", buf1); + } + + // computation code + for (int ni = 0; ni < node_num; ni++) { + int idx = get_idx(si, ni, node_num); + node_to_code_c(node + idx, buf0, buf1); + } + + if (si != stage_num - 1) { + printf( + " range_check_buf(stage, input, bf1, size, stage_range[stage]);\n"); + } + } + printf(" apply_range(stage, input, output, size, stage_range[stage]);\n"); + printf("}\n"); +} + +void single_node_to_code_sse2(Node *node, const char *buf0, const char *buf1) { + printf(" %s[%2d] =", buf1, node->nodeIdx); + if (node->inWeight[0] == 1 && node->inWeight[1] == 1) { + printf(" _mm_adds_epi16(%s[%d], %s[%d])", buf0, node->inNodeIdx[0], buf0, + node->inNodeIdx[1]); + } else if (node->inWeight[0] == 1 && node->inWeight[1] == -1) { + printf(" _mm_subs_epi16(%s[%d], %s[%d])", buf0, node->inNodeIdx[0], buf0, + node->inNodeIdx[1]); + } else if (node->inWeight[0] == -1 && node->inWeight[1] == 1) { + printf(" _mm_subs_epi16(%s[%d], %s[%d])", buf0, node->inNodeIdx[1], buf0, + node->inNodeIdx[0]); + } else if (node->inWeight[0] == 1 && node->inWeight[1] == 0) { + printf(" %s[%d]", buf0, node->inNodeIdx[0]); + } else if (node->inWeight[0] == 0 && node->inWeight[1] == 1) { + printf(" %s[%d]", buf0, node->inNodeIdx[1]); + } else if (node->inWeight[0] == -1 && node->inWeight[1] == 0) { + printf(" _mm_subs_epi16(__zero, %s[%d])", buf0, node->inNodeIdx[0]); + } else if (node->inWeight[0] == 0 && node->inWeight[1] == -1) { + printf(" _mm_subs_epi16(__zero, %s[%d])", buf0, node->inNodeIdx[1]); + } + printf(";\n"); +} + +void pair_node_to_code_sse2(Node *node, Node *partnerNode, const char *buf0, + const char *buf1) { + char temp0[100]; + char temp1[100]; + // btf_16_sse2_type0(w0, w1, in0, in1, out0, out1) + if (node->inNodeIdx[0] != partnerNode->inNodeIdx[0]) + printf(" btf_16_sse2(%s, %s, %s[%d], %s[%d], %s[%d], %s[%d]);\n", + cos_text_sse2(node->inWeight[0], node->inWeight[1], COS_MOD, temp0, + 100), + cos_text_sse2(partnerNode->inWeight[1], partnerNode->inWeight[0], + COS_MOD, temp1, 100), + buf0, node->inNodeIdx[0], buf0, node->inNodeIdx[1], buf1, + node->nodeIdx, buf1, partnerNode->nodeIdx); + else + printf(" btf_16_sse2(%s, %s, %s[%d], %s[%d], %s[%d], %s[%d]);\n", + cos_text_sse2(node->inWeight[0], node->inWeight[1], COS_MOD, temp0, + 100), + cos_text_sse2(partnerNode->inWeight[0], partnerNode->inWeight[1], + COS_MOD, temp1, 100), + buf0, node->inNodeIdx[0], buf0, node->inNodeIdx[1], buf1, + node->nodeIdx, buf1, partnerNode->nodeIdx); +} + +Node *get_partner_node(Node *node) { + int diff = node->inNode[1]->nodeIdx - node->nodeIdx; + return node + diff; +} + +void node_to_code_sse2(Node *node, const char *buf0, const char *buf1) { + int cnt = 0; + int cnt1 = 0; + if (node->visited == 0) { + node->visited = 1; + for (int i = 0; i < 2; i++) { + if (fabs(node->inWeight[i]) == 1 || fabs(node->inWeight[i]) == 0) cnt++; + if (fabs(node->inWeight[i]) == 1) cnt1++; + } + if (cnt == 2) { + if (cnt1 == 2) { + // has a partner + Node *partnerNode = get_partner_node(node); + partnerNode->visited = 1; + single_node_to_code_sse2(node, buf0, buf1); + single_node_to_code_sse2(partnerNode, buf0, buf1); + } else { + single_node_to_code_sse2(node, buf0, buf1); + } + } else { + Node *partnerNode = get_partner_node(node); + partnerNode->visited = 1; + pair_node_to_code_sse2(node, partnerNode, buf0, buf1); + } + } +} + +void gen_cospi_list_sse2(Node *node, int stage_num, int node_num) { + int visited[65][65][2][2]; + memset(visited, 0, sizeof(visited)); + char text[100]; + char text1[100]; + char text2[100]; + int size = 100; + printf("\n"); + for (int si = 1; si < stage_num; si++) { + for (int ni = 0; ni < node_num; ni++) { + int idx = get_idx(si, ni, node_num); + int cnt = 0; + Node *node0 = node + idx; + if (node0->visited == 0) { + node0->visited = 1; + for (int i = 0; i < 2; i++) { + if (fabs(node0->inWeight[i]) == 1 || fabs(node0->inWeight[i]) == 0) + cnt++; + } + if (cnt != 2) { + { + double w0 = node0->inWeight[0]; + double w1 = node0->inWeight[1]; + int idx0 = get_cos_idx(w0, COS_MOD); + int idx1 = get_cos_idx(w1, COS_MOD); + int sgn0 = w0 < 0 ? 1 : 0; + int sgn1 = w1 < 0 ? 1 : 0; + + if (!visited[idx0][idx1][sgn0][sgn1]) { + visited[idx0][idx1][sgn0][sgn1] = 1; + printf(" __m128i %s = pair_set_epi16(%s, %s);\n", + cos_text_sse2(w0, w1, COS_MOD, text, size), + cos_text_arr(w0, COS_MOD, text1, size), + cos_text_arr(w1, COS_MOD, text2, size)); + } + } + Node *node1 = get_partner_node(node0); + node1->visited = 1; + if (node1->inNode[0]->nodeIdx != node0->inNode[0]->nodeIdx) { + double w0 = node1->inWeight[0]; + double w1 = node1->inWeight[1]; + int idx0 = get_cos_idx(w0, COS_MOD); + int idx1 = get_cos_idx(w1, COS_MOD); + int sgn0 = w0 < 0 ? 1 : 0; + int sgn1 = w1 < 0 ? 1 : 0; + + if (!visited[idx1][idx0][sgn1][sgn0]) { + visited[idx1][idx0][sgn1][sgn0] = 1; + printf(" __m128i %s = pair_set_epi16(%s, %s);\n", + cos_text_sse2(w1, w0, COS_MOD, text, size), + cos_text_arr(w1, COS_MOD, text1, size), + cos_text_arr(w0, COS_MOD, text2, size)); + } + } else { + double w0 = node1->inWeight[0]; + double w1 = node1->inWeight[1]; + int idx0 = get_cos_idx(w0, COS_MOD); + int idx1 = get_cos_idx(w1, COS_MOD); + int sgn0 = w0 < 0 ? 1 : 0; + int sgn1 = w1 < 0 ? 1 : 0; + + if (!visited[idx0][idx1][sgn0][sgn1]) { + visited[idx0][idx1][sgn0][sgn1] = 1; + printf(" __m128i %s = pair_set_epi16(%s, %s);\n", + cos_text_sse2(w0, w1, COS_MOD, text, size), + cos_text_arr(w0, COS_MOD, text1, size), + cos_text_arr(w1, COS_MOD, text2, size)); + } + } + } + } + } + } +} + +void gen_code_sse2(Node *node, int stage_num, int node_num, TYPE_TXFM type) { + char *fun_name = new char[100]; + get_fun_name(fun_name, 100, type, node_num); + + printf("\n"); + printf( + "void %s_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) " + "{\n", + fun_name); + + printf(" const int32_t* cospi = cospi_arr(cos_bit);\n"); + printf(" const __m128i __zero = _mm_setzero_si128();\n"); + printf(" const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));\n"); + + graph_reset_visited(node, stage_num, node_num); + gen_cospi_list_sse2(node, stage_num, node_num); + graph_reset_visited(node, stage_num, node_num); + for (int si = 1; si < stage_num; si++) { + char in[100]; + char out[100]; + printf("\n"); + printf(" // stage %d\n", si); + if (si == 1) + snprintf(in, 100, "%s", "input"); + else + snprintf(in, 100, "x%d", si - 1); + if (si == stage_num - 1) { + snprintf(out, 100, "%s", "output"); + } else { + snprintf(out, 100, "x%d", si); + printf(" __m128i %s[%d];\n", out, node_num); + } + // computation code + for (int ni = 0; ni < node_num; ni++) { + int idx = get_idx(si, ni, node_num); + node_to_code_sse2(node + idx, in, out); + } + } + + printf("}\n"); +} +void gen_cospi_list_sse4_1(Node *node, int stage_num, int node_num) { + int visited[65][2]; + memset(visited, 0, sizeof(visited)); + char text[100]; + char text1[100]; + int size = 100; + printf("\n"); + for (int si = 1; si < stage_num; si++) { + for (int ni = 0; ni < node_num; ni++) { + int idx = get_idx(si, ni, node_num); + Node *node0 = node + idx; + if (node0->visited == 0) { + int cnt = 0; + node0->visited = 1; + for (int i = 0; i < 2; i++) { + if (fabs(node0->inWeight[i]) == 1 || fabs(node0->inWeight[i]) == 0) + cnt++; + } + if (cnt != 2) { + for (int i = 0; i < 2; i++) { + if (fabs(node0->inWeight[i]) != 1 && + fabs(node0->inWeight[i]) != 0) { + double w = node0->inWeight[i]; + int idx = get_cos_idx(w, COS_MOD); + int sgn = w < 0 ? 1 : 0; + + if (!visited[idx][sgn]) { + visited[idx][sgn] = 1; + printf(" __m128i %s = _mm_set1_epi32(%s);\n", + cos_text_sse4_1(w, COS_MOD, text, size), + cos_text_arr(w, COS_MOD, text1, size)); + } + } + } + Node *node1 = get_partner_node(node0); + node1->visited = 1; + } + } + } + } +} + +void single_node_to_code_sse4_1(Node *node, const char *buf0, + const char *buf1) { + printf(" %s[%2d] =", buf1, node->nodeIdx); + if (node->inWeight[0] == 1 && node->inWeight[1] == 1) { + printf(" _mm_add_epi32(%s[%d], %s[%d])", buf0, node->inNodeIdx[0], buf0, + node->inNodeIdx[1]); + } else if (node->inWeight[0] == 1 && node->inWeight[1] == -1) { + printf(" _mm_sub_epi32(%s[%d], %s[%d])", buf0, node->inNodeIdx[0], buf0, + node->inNodeIdx[1]); + } else if (node->inWeight[0] == -1 && node->inWeight[1] == 1) { + printf(" _mm_sub_epi32(%s[%d], %s[%d])", buf0, node->inNodeIdx[1], buf0, + node->inNodeIdx[0]); + } else if (node->inWeight[0] == 1 && node->inWeight[1] == 0) { + printf(" %s[%d]", buf0, node->inNodeIdx[0]); + } else if (node->inWeight[0] == 0 && node->inWeight[1] == 1) { + printf(" %s[%d]", buf0, node->inNodeIdx[1]); + } else if (node->inWeight[0] == -1 && node->inWeight[1] == 0) { + printf(" _mm_sub_epi32(__zero, %s[%d])", buf0, node->inNodeIdx[0]); + } else if (node->inWeight[0] == 0 && node->inWeight[1] == -1) { + printf(" _mm_sub_epi32(__zero, %s[%d])", buf0, node->inNodeIdx[1]); + } + printf(";\n"); +} + +void pair_node_to_code_sse4_1(Node *node, Node *partnerNode, const char *buf0, + const char *buf1) { + char temp0[100]; + char temp1[100]; + if (node->inWeight[0] * partnerNode->inWeight[0] < 0) { + /* type0 + * cos sin + * sin -cos + */ + // btf_32_sse2_type0(w0, w1, in0, in1, out0, out1) + // out0 = w0*in0 + w1*in1 + // out1 = -w0*in1 + w1*in0 + printf( + " btf_32_type0_sse4_1_new(%s, %s, %s[%d], %s[%d], %s[%d], %s[%d], " + "__rounding, cos_bit);\n", + cos_text_sse4_1(node->inWeight[0], COS_MOD, temp0, 100), + cos_text_sse4_1(node->inWeight[1], COS_MOD, temp1, 100), buf0, + node->inNodeIdx[0], buf0, node->inNodeIdx[1], buf1, node->nodeIdx, buf1, + partnerNode->nodeIdx); + } else { + /* type1 + * cos sin + * -sin cos + */ + // btf_32_sse2_type1(w0, w1, in0, in1, out0, out1) + // out0 = w0*in0 + w1*in1 + // out1 = w0*in1 - w1*in0 + printf( + " btf_32_type1_sse4_1_new(%s, %s, %s[%d], %s[%d], %s[%d], %s[%d], " + "__rounding, cos_bit);\n", + cos_text_sse4_1(node->inWeight[0], COS_MOD, temp0, 100), + cos_text_sse4_1(node->inWeight[1], COS_MOD, temp1, 100), buf0, + node->inNodeIdx[0], buf0, node->inNodeIdx[1], buf1, node->nodeIdx, buf1, + partnerNode->nodeIdx); + } +} + +void node_to_code_sse4_1(Node *node, const char *buf0, const char *buf1) { + int cnt = 0; + int cnt1 = 0; + if (node->visited == 0) { + node->visited = 1; + for (int i = 0; i < 2; i++) { + if (fabs(node->inWeight[i]) == 1 || fabs(node->inWeight[i]) == 0) cnt++; + if (fabs(node->inWeight[i]) == 1) cnt1++; + } + if (cnt == 2) { + if (cnt1 == 2) { + // has a partner + Node *partnerNode = get_partner_node(node); + partnerNode->visited = 1; + single_node_to_code_sse4_1(node, buf0, buf1); + single_node_to_code_sse4_1(partnerNode, buf0, buf1); + } else { + single_node_to_code_sse2(node, buf0, buf1); + } + } else { + Node *partnerNode = get_partner_node(node); + partnerNode->visited = 1; + pair_node_to_code_sse4_1(node, partnerNode, buf0, buf1); + } + } +} + +void gen_code_sse4_1(Node *node, int stage_num, int node_num, TYPE_TXFM type) { + char *fun_name = new char[100]; + get_fun_name(fun_name, 100, type, node_num); + + printf("\n"); + printf( + "void %s_sse4_1(const __m128i *input, __m128i *output, int8_t cos_bit) " + "{\n", + fun_name); + + printf(" const int32_t* cospi = cospi_arr(cos_bit);\n"); + printf(" const __m128i __zero = _mm_setzero_si128();\n"); + printf(" const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));\n"); + + graph_reset_visited(node, stage_num, node_num); + gen_cospi_list_sse4_1(node, stage_num, node_num); + graph_reset_visited(node, stage_num, node_num); + for (int si = 1; si < stage_num; si++) { + char in[100]; + char out[100]; + printf("\n"); + printf(" // stage %d\n", si); + if (si == 1) + snprintf(in, 100, "%s", "input"); + else + snprintf(in, 100, "x%d", si - 1); + if (si == stage_num - 1) { + snprintf(out, 100, "%s", "output"); + } else { + snprintf(out, 100, "x%d", si); + printf(" __m128i %s[%d];\n", out, node_num); + } + // computation code + for (int ni = 0; ni < node_num; ni++) { + int idx = get_idx(si, ni, node_num); + node_to_code_sse4_1(node + idx, in, out); + } + } + + printf("}\n"); +} + +void gen_hybrid_code(CODE_TYPE code_type, TYPE_TXFM txfm_type, int node_num) { + int stage_num = get_hybrid_stage_num(txfm_type, node_num); + + Node *node = new Node[node_num * stage_num]; + init_graph(node, stage_num, node_num); + + gen_hybrid_graph_1d(node, stage_num, node_num, 0, 0, node_num, txfm_type); + + switch (code_type) { + case CODE_TYPE_C: gen_code_c(node, stage_num, node_num, txfm_type); break; + case CODE_TYPE_SSE2: + gen_code_sse2(node, stage_num, node_num, txfm_type); + break; + case CODE_TYPE_SSE4_1: + gen_code_sse4_1(node, stage_num, node_num, txfm_type); + break; + } + + delete[] node; +} + +int main(int argc, char **argv) { + CODE_TYPE code_type = CODE_TYPE_SSE4_1; + for (int txfm_type = TYPE_DCT; txfm_type < TYPE_LAST; txfm_type++) { + for (int node_num = 4; node_num <= 64; node_num *= 2) { + gen_hybrid_code(code_type, (TYPE_TXFM)txfm_type, node_num); + } + } + return 0; +} diff --git a/third_party/aom/tools/txfm_analyzer/txfm_graph.cc b/third_party/aom/tools/txfm_analyzer/txfm_graph.cc new file mode 100644 index 000000000..a24906100 --- /dev/null +++ b/third_party/aom/tools/txfm_analyzer/txfm_graph.cc @@ -0,0 +1,943 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "tools/txfm_analyzer/txfm_graph.h" + +#include +#include +#include + +typedef struct Node Node; + +void get_fun_name(char *str_fun_name, int str_buf_size, const TYPE_TXFM type, + const int txfm_size) { + if (type == TYPE_DCT) + snprintf(str_fun_name, str_buf_size, "fdct%d_new", txfm_size); + else if (type == TYPE_ADST) + snprintf(str_fun_name, str_buf_size, "fadst%d_new", txfm_size); + else if (type == TYPE_IDCT) + snprintf(str_fun_name, str_buf_size, "idct%d_new", txfm_size); + else if (type == TYPE_IADST) + snprintf(str_fun_name, str_buf_size, "iadst%d_new", txfm_size); +} + +void get_txfm_type_name(char *str_fun_name, int str_buf_size, + const TYPE_TXFM type, const int txfm_size) { + if (type == TYPE_DCT) + snprintf(str_fun_name, str_buf_size, "TXFM_TYPE_DCT%d", txfm_size); + else if (type == TYPE_ADST) + snprintf(str_fun_name, str_buf_size, "TXFM_TYPE_ADST%d", txfm_size); + else if (type == TYPE_IDCT) + snprintf(str_fun_name, str_buf_size, "TXFM_TYPE_DCT%d", txfm_size); + else if (type == TYPE_IADST) + snprintf(str_fun_name, str_buf_size, "TXFM_TYPE_ADST%d", txfm_size); +} + +void get_hybrid_2d_type_name(char *buf, int buf_size, const TYPE_TXFM type0, + const TYPE_TXFM type1, const int txfm_size0, + const int txfm_size1) { + if (type0 == TYPE_DCT && type1 == TYPE_DCT) + snprintf(buf, buf_size, "_dct_dct_%dx%d", txfm_size1, txfm_size0); + else if (type0 == TYPE_DCT && type1 == TYPE_ADST) + snprintf(buf, buf_size, "_dct_adst_%dx%d", txfm_size1, txfm_size0); + else if (type0 == TYPE_ADST && type1 == TYPE_ADST) + snprintf(buf, buf_size, "_adst_adst_%dx%d", txfm_size1, txfm_size0); + else if (type0 == TYPE_ADST && type1 == TYPE_DCT) + snprintf(buf, buf_size, "_adst_dct_%dx%d", txfm_size1, txfm_size0); +} + +TYPE_TXFM get_inv_type(TYPE_TXFM type) { + if (type == TYPE_DCT) + return TYPE_IDCT; + else if (type == TYPE_ADST) + return TYPE_IADST; + else if (type == TYPE_IDCT) + return TYPE_DCT; + else if (type == TYPE_IADST) + return TYPE_ADST; + else + return TYPE_LAST; +} + +void reference_dct_1d(double *in, double *out, int size) { + const double kInvSqrt2 = 0.707106781186547524400844362104; + for (int k = 0; k < size; k++) { + out[k] = 0; // initialize out[k] + for (int n = 0; n < size; n++) { + out[k] += in[n] * cos(PI * (2 * n + 1) * k / (2 * size)); + } + if (k == 0) out[k] = out[k] * kInvSqrt2; + } +} + +void reference_dct_2d(double *in, double *out, int size) { + double *tempOut = new double[size * size]; + // dct each row: in -> out + for (int r = 0; r < size; r++) { + reference_dct_1d(in + r * size, out + r * size, size); + } + + for (int r = 0; r < size; r++) { + // out ->tempOut + for (int c = 0; c < size; c++) { + tempOut[r * size + c] = out[c * size + r]; + } + } + for (int r = 0; r < size; r++) { + reference_dct_1d(tempOut + r * size, out + r * size, size); + } + delete[] tempOut; +} + +void reference_adst_1d(double *in, double *out, int size) { + for (int k = 0; k < size; k++) { + out[k] = 0; // initialize out[k] + for (int n = 0; n < size; n++) { + out[k] += in[n] * sin(PI * (2 * n + 1) * (2 * k + 1) / (4 * size)); + } + } +} + +void reference_hybrid_2d(double *in, double *out, int size, int type0, + int type1) { + double *tempOut = new double[size * size]; + // dct each row: in -> out + for (int r = 0; r < size; r++) { + if (type0 == TYPE_DCT) + reference_dct_1d(in + r * size, out + r * size, size); + else + reference_adst_1d(in + r * size, out + r * size, size); + } + + for (int r = 0; r < size; r++) { + // out ->tempOut + for (int c = 0; c < size; c++) { + tempOut[r * size + c] = out[c * size + r]; + } + } + for (int r = 0; r < size; r++) { + if (type1 == TYPE_DCT) + reference_dct_1d(tempOut + r * size, out + r * size, size); + else + reference_adst_1d(tempOut + r * size, out + r * size, size); + } + delete[] tempOut; +} + +void reference_hybrid_2d_new(double *in, double *out, int size0, int size1, + int type0, int type1) { + double *tempOut = new double[size0 * size1]; + // dct each row: in -> out + for (int r = 0; r < size1; r++) { + if (type0 == TYPE_DCT) + reference_dct_1d(in + r * size0, out + r * size0, size0); + else + reference_adst_1d(in + r * size0, out + r * size0, size0); + } + + for (int r = 0; r < size1; r++) { + // out ->tempOut + for (int c = 0; c < size0; c++) { + tempOut[c * size1 + r] = out[r * size0 + c]; + } + } + for (int r = 0; r < size0; r++) { + if (type1 == TYPE_DCT) + reference_dct_1d(tempOut + r * size1, out + r * size1, size1); + else + reference_adst_1d(tempOut + r * size1, out + r * size1, size1); + } + delete[] tempOut; +} + +unsigned int get_max_bit(unsigned int x) { + int max_bit = -1; + while (x) { + x = x >> 1; + max_bit++; + } + return max_bit; +} + +unsigned int bitwise_reverse(unsigned int x, int max_bit) { + x = ((x >> 16) & 0x0000ffff) | ((x & 0x0000ffff) << 16); + x = ((x >> 8) & 0x00ff00ff) | ((x & 0x00ff00ff) << 8); + x = ((x >> 4) & 0x0f0f0f0f) | ((x & 0x0f0f0f0f) << 4); + x = ((x >> 2) & 0x33333333) | ((x & 0x33333333) << 2); + x = ((x >> 1) & 0x55555555) | ((x & 0x55555555) << 1); + x = x >> (31 - max_bit); + return x; +} + +int get_idx(int ri, int ci, int cSize) { return ri * cSize + ci; } + +void add_node(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int in, double w) { + int outIdx = get_idx(stage_idx, node_idx, node_num); + int inIdx = get_idx(stage_idx - 1, in, node_num); + int idx = node[outIdx].inNodeNum; + if (idx < 2) { + node[outIdx].inNode[idx] = &node[inIdx]; + node[outIdx].inNodeIdx[idx] = in; + node[outIdx].inWeight[idx] = w; + idx++; + node[outIdx].inNodeNum = idx; + } else { + printf("Error: inNode is full"); + } +} + +void connect_node(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int in0, double w0, int in1, double w1) { + int outIdx = get_idx(stage_idx, node_idx, node_num); + int inIdx0 = get_idx(stage_idx - 1, in0, node_num); + int inIdx1 = get_idx(stage_idx - 1, in1, node_num); + + int idx = 0; + // if(w0 != 0) { + node[outIdx].inNode[idx] = &node[inIdx0]; + node[outIdx].inNodeIdx[idx] = in0; + node[outIdx].inWeight[idx] = w0; + idx++; + //} + + // if(w1 != 0) { + node[outIdx].inNode[idx] = &node[inIdx1]; + node[outIdx].inNodeIdx[idx] = in1; + node[outIdx].inWeight[idx] = w1; + idx++; + //} + + node[outIdx].inNodeNum = idx; +} + +void propagate(Node *node, int stage_num, int node_num, int stage_idx) { + for (int ni = 0; ni < node_num; ni++) { + int outIdx = get_idx(stage_idx, ni, node_num); + node[outIdx].value = 0; + for (int k = 0; k < node[outIdx].inNodeNum; k++) { + node[outIdx].value += + node[outIdx].inNode[k]->value * node[outIdx].inWeight[k]; + } + } +} + +int64_t round_shift(int64_t value, int bit) { + if (bit > 0) { + if (value < 0) { + return -round_shift(-value, bit); + } else { + return (value + (1 << (bit - 1))) >> bit; + } + } else { + return value << (-bit); + } +} + +void round_shift_array(int32_t *arr, int size, int bit) { + if (bit == 0) { + return; + } else { + for (int i = 0; i < size; i++) { + arr[i] = round_shift(arr[i], bit); + } + } +} + +void graph_reset_visited(Node *node, int stage_num, int node_num) { + for (int si = 0; si < stage_num; si++) { + for (int ni = 0; ni < node_num; ni++) { + int idx = get_idx(si, ni, node_num); + node[idx].visited = 0; + } + } +} + +void estimate_value(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int estimate_bit) { + if (stage_idx > 0) { + int outIdx = get_idx(stage_idx, node_idx, node_num); + int64_t out = 0; + node[outIdx].value = 0; + for (int k = 0; k < node[outIdx].inNodeNum; k++) { + int64_t w = round(node[outIdx].inWeight[k] * (1 << estimate_bit)); + int64_t v = round(node[outIdx].inNode[k]->value); + out += v * w; + } + node[outIdx].value = round_shift(out, estimate_bit); + } +} + +void amplify_value(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int amplify_bit) { + int outIdx = get_idx(stage_idx, node_idx, node_num); + node[outIdx].value = round_shift(round(node[outIdx].value), -amplify_bit); +} + +void propagate_estimate_amlify(Node *node, int stage_num, int node_num, + int stage_idx, int amplify_bit, + int estimate_bit) { + for (int ni = 0; ni < node_num; ni++) { + estimate_value(node, stage_num, node_num, stage_idx, ni, estimate_bit); + amplify_value(node, stage_num, node_num, stage_idx, ni, amplify_bit); + } +} + +void init_graph(Node *node, int stage_num, int node_num) { + for (int si = 0; si < stage_num; si++) { + for (int ni = 0; ni < node_num; ni++) { + int outIdx = get_idx(si, ni, node_num); + node[outIdx].stageIdx = si; + node[outIdx].nodeIdx = ni; + node[outIdx].value = 0; + node[outIdx].inNodeNum = 0; + if (si >= 1) { + connect_node(node, stage_num, node_num, si, ni, ni, 1, ni, 0); + } + } + } +} + +void gen_B_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int N, int star) { + for (int i = 0; i < N / 2; i++) { + int out = node_idx + i; + int in1 = node_idx + N - 1 - i; + if (star == 1) { + connect_node(node, stage_num, node_num, stage_idx + 1, out, out, -1, in1, + 1); + } else { + connect_node(node, stage_num, node_num, stage_idx + 1, out, out, 1, in1, + 1); + } + } + for (int i = N / 2; i < N; i++) { + int out = node_idx + i; + int in1 = node_idx + N - 1 - i; + if (star == 1) { + connect_node(node, stage_num, node_num, stage_idx + 1, out, out, 1, in1, + 1); + } else { + connect_node(node, stage_num, node_num, stage_idx + 1, out, out, -1, in1, + 1); + } + } +} + +void gen_P_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int N) { + int max_bit = get_max_bit(N - 1); + for (int i = 0; i < N; i++) { + int out = node_idx + bitwise_reverse(i, max_bit); + int in = node_idx + i; + connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0); + } +} + +void gen_type1_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int N) { + int max_bit = get_max_bit(N); + for (int ni = 0; ni < N / 2; ni++) { + int ai = bitwise_reverse(N + ni, max_bit); + int out = node_idx + ni; + int in1 = node_idx + N - ni - 1; + connect_node(node, stage_num, node_num, stage_idx + 1, out, out, + sin(PI * ai / (2 * 2 * N)), in1, cos(PI * ai / (2 * 2 * N))); + } + for (int ni = N / 2; ni < N; ni++) { + int ai = bitwise_reverse(N + ni, max_bit); + int out = node_idx + ni; + int in1 = node_idx + N - ni - 1; + connect_node(node, stage_num, node_num, stage_idx + 1, out, out, + cos(PI * ai / (2 * 2 * N)), in1, -sin(PI * ai / (2 * 2 * N))); + } +} + +void gen_type2_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int N) { + for (int ni = 0; ni < N / 4; ni++) { + int out = node_idx + ni; + connect_node(node, stage_num, node_num, stage_idx + 1, out, out, 1, out, 0); + } + + for (int ni = N / 4; ni < N / 2; ni++) { + int out = node_idx + ni; + int in1 = node_idx + N - ni - 1; + connect_node(node, stage_num, node_num, stage_idx + 1, out, out, + -cos(PI / 4), in1, cos(-PI / 4)); + } + + for (int ni = N / 2; ni < N * 3 / 4; ni++) { + int out = node_idx + ni; + int in1 = node_idx + N - ni - 1; + connect_node(node, stage_num, node_num, stage_idx + 1, out, out, + cos(-PI / 4), in1, cos(PI / 4)); + } + + for (int ni = N * 3 / 4; ni < N; ni++) { + int out = node_idx + ni; + connect_node(node, stage_num, node_num, stage_idx + 1, out, out, 1, out, 0); + } +} + +void gen_type3_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int idx, int N) { + // TODO(angiebird): Simplify and clarify this function + + int i = 2 * N / (1 << (idx / 2)); + int max_bit = + get_max_bit(i / 2) - 1; // the max_bit counts on i/2 instead of N here + int N_over_i = 2 << (idx / 2); + + for (int nj = 0; nj < N / 2; nj += N_over_i) { + int j = nj / (N_over_i); + int kj = bitwise_reverse(i / 4 + j, max_bit); + // printf("kj = %d\n", kj); + + // I_N/2i --- 0 + int offset = nj; + for (int ni = 0; ni < N_over_i / 4; ni++) { + int out = node_idx + offset + ni; + int in = out; + connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0); + } + + // -C_Kj/i --- S_Kj/i + offset += N_over_i / 4; + for (int ni = 0; ni < N_over_i / 4; ni++) { + int out = node_idx + offset + ni; + int in0 = out; + double w0 = -cos(kj * PI / i); + int in1 = N - (offset + ni) - 1 + node_idx; + double w1 = sin(kj * PI / i); + connect_node(node, stage_num, node_num, stage_idx + 1, out, in0, w0, in1, + w1); + } + + // S_kj/i --- -C_Kj/i + offset += N_over_i / 4; + for (int ni = 0; ni < N_over_i / 4; ni++) { + int out = node_idx + offset + ni; + int in0 = out; + double w0 = -sin(kj * PI / i); + int in1 = N - (offset + ni) - 1 + node_idx; + double w1 = -cos(kj * PI / i); + connect_node(node, stage_num, node_num, stage_idx + 1, out, in0, w0, in1, + w1); + } + + // I_N/2i --- 0 + offset += N_over_i / 4; + for (int ni = 0; ni < N_over_i / 4; ni++) { + int out = node_idx + offset + ni; + int in = out; + connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0); + } + } + + for (int nj = N / 2; nj < N; nj += N_over_i) { + int j = nj / N_over_i; + int kj = bitwise_reverse(i / 4 + j, max_bit); + + // I_N/2i --- 0 + int offset = nj; + for (int ni = 0; ni < N_over_i / 4; ni++) { + int out = node_idx + offset + ni; + int in = out; + connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0); + } + + // C_kj/i --- -S_Kj/i + offset += N_over_i / 4; + for (int ni = 0; ni < N_over_i / 4; ni++) { + int out = node_idx + offset + ni; + int in0 = out; + double w0 = cos(kj * PI / i); + int in1 = N - (offset + ni) - 1 + node_idx; + double w1 = -sin(kj * PI / i); + connect_node(node, stage_num, node_num, stage_idx + 1, out, in0, w0, in1, + w1); + } + + // S_kj/i --- C_Kj/i + offset += N_over_i / 4; + for (int ni = 0; ni < N_over_i / 4; ni++) { + int out = node_idx + offset + ni; + int in0 = out; + double w0 = sin(kj * PI / i); + int in1 = N - (offset + ni) - 1 + node_idx; + double w1 = cos(kj * PI / i); + connect_node(node, stage_num, node_num, stage_idx + 1, out, in0, w0, in1, + w1); + } + + // I_N/2i --- 0 + offset += N_over_i / 4; + for (int ni = 0; ni < N_over_i / 4; ni++) { + int out = node_idx + offset + ni; + int in = out; + connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0); + } + } +} + +void gen_type4_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int idx, int N) { + int B_size = 1 << ((idx + 1) / 2); + for (int ni = 0; ni < N; ni += B_size) { + gen_B_graph(node, stage_num, node_num, stage_idx, node_idx + ni, B_size, + (ni / B_size) % 2); + } +} + +void gen_R_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int N) { + int max_idx = 2 * (get_max_bit(N) + 1) - 3; + for (int idx = 0; idx < max_idx; idx++) { + int s = stage_idx + max_idx - idx - 1; + if (idx == 0) { + // type 1 + gen_type1_graph(node, stage_num, node_num, s, node_idx, N); + } else if (idx == max_idx - 1) { + // type 2 + gen_type2_graph(node, stage_num, node_num, s, node_idx, N); + } else if ((idx + 1) % 2 == 0) { + // type 4 + gen_type4_graph(node, stage_num, node_num, s, node_idx, idx, N); + } else if ((idx + 1) % 2 == 1) { + // type 3 + gen_type3_graph(node, stage_num, node_num, s, node_idx, idx, N); + } else { + printf("check gen_R_graph()\n"); + } + } +} + +void gen_DCT_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int N) { + if (N > 2) { + gen_B_graph(node, stage_num, node_num, stage_idx, node_idx, N, 0); + gen_DCT_graph(node, stage_num, node_num, stage_idx + 1, node_idx, N / 2); + gen_R_graph(node, stage_num, node_num, stage_idx + 1, node_idx + N / 2, + N / 2); + } else { + // generate dct_2 + connect_node(node, stage_num, node_num, stage_idx + 1, node_idx, node_idx, + cos(PI / 4), node_idx + 1, cos(PI / 4)); + connect_node(node, stage_num, node_num, stage_idx + 1, node_idx + 1, + node_idx + 1, -cos(PI / 4), node_idx, cos(PI / 4)); + } +} + +int get_dct_stage_num(int size) { return 2 * get_max_bit(size); } + +void gen_DCT_graph_1d(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int dct_node_num) { + gen_DCT_graph(node, stage_num, node_num, stage_idx, node_idx, dct_node_num); + int dct_stage_num = get_dct_stage_num(dct_node_num); + gen_P_graph(node, stage_num, node_num, stage_idx + dct_stage_num - 2, + node_idx, dct_node_num); +} + +void gen_adst_B_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_idx) { + int size = 1 << (adst_idx + 1); + for (int ni = 0; ni < size / 2; ni++) { + int nOut = node_idx + ni; + int nIn = nOut + size / 2; + connect_node(node, stage_num, node_num, stage_idx + 1, nOut, nOut, 1, nIn, + 1); + // printf("nOut: %d nIn: %d\n", nOut, nIn); + } + for (int ni = size / 2; ni < size; ni++) { + int nOut = node_idx + ni; + int nIn = nOut - size / 2; + connect_node(node, stage_num, node_num, stage_idx + 1, nOut, nOut, -1, nIn, + 1); + // printf("ndctOut: %d nIn: %d\n", nOut, nIn); + } +} + +void gen_adst_U_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_idx, int adst_node_num) { + int size = 1 << (adst_idx + 1); + for (int ni = 0; ni < adst_node_num; ni += size) { + gen_adst_B_graph(node, stage_num, node_num, stage_idx, node_idx + ni, + adst_idx); + } +} + +void gen_adst_T_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, double freq) { + connect_node(node, stage_num, node_num, stage_idx + 1, node_idx, node_idx, + cos(freq * PI), node_idx + 1, sin(freq * PI)); + connect_node(node, stage_num, node_num, stage_idx + 1, node_idx + 1, + node_idx + 1, -cos(freq * PI), node_idx, sin(freq * PI)); +} + +void gen_adst_E_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_idx) { + int size = 1 << (adst_idx); + for (int i = 0; i < size / 2; i++) { + int ni = i * 2; + double fi = (1 + 4 * i) * 1.0 / (1 << (adst_idx + 1)); + gen_adst_T_graph(node, stage_num, node_num, stage_idx, node_idx + ni, fi); + } +} + +void gen_adst_V_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_idx, int adst_node_num) { + int size = 1 << (adst_idx); + for (int i = 0; i < adst_node_num / size; i++) { + if (i % 2 == 1) { + int ni = i * size; + gen_adst_E_graph(node, stage_num, node_num, stage_idx, node_idx + ni, + adst_idx); + } + } +} +void gen_adst_VJ_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num) { + for (int i = 0; i < adst_node_num / 2; i++) { + int ni = i * 2; + double fi = (1 + 4 * i) * 1.0 / (4 * adst_node_num); + gen_adst_T_graph(node, stage_num, node_num, stage_idx, node_idx + ni, fi); + } +} +void gen_adst_Q_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num) { + // reverse order when idx is 1, 3, 5, 7 ... + // example of adst_node_num = 8: + // 0 1 2 3 4 5 6 7 + // --> 0 7 2 5 4 3 6 1 + for (int ni = 0; ni < adst_node_num; ni++) { + if (ni % 2 == 0) { + int out = node_idx + ni; + connect_node(node, stage_num, node_num, stage_idx + 1, out, out, 1, out, + 0); + } else { + int out = node_idx + ni; + int in = node_idx + adst_node_num - ni; + connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0); + } + } +} +void gen_adst_Ibar_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num) { + // reverse order + // 0 1 2 3 --> 3 2 1 0 + for (int ni = 0; ni < adst_node_num; ni++) { + int out = node_idx + ni; + int in = node_idx + adst_node_num - ni - 1; + connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0); + } +} + +int get_Q_out2in(int adst_node_num, int out) { + int in; + if (out % 2 == 0) { + in = out; + } else { + in = adst_node_num - out; + } + return in; +} + +int get_Ibar_out2in(int adst_node_num, int out) { + return adst_node_num - out - 1; +} + +void gen_adst_IbarQ_graph(Node *node, int stage_num, int node_num, + int stage_idx, int node_idx, int adst_node_num) { + // in -> Ibar -> Q -> out + for (int ni = 0; ni < adst_node_num; ni++) { + int out = node_idx + ni; + int in = node_idx + + get_Ibar_out2in(adst_node_num, get_Q_out2in(adst_node_num, ni)); + connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0); + } +} + +void gen_adst_D_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num) { + // reverse order + for (int ni = 0; ni < adst_node_num; ni++) { + int out = node_idx + ni; + int in = out; + if (ni % 2 == 0) { + connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0); + } else { + connect_node(node, stage_num, node_num, stage_idx + 1, out, in, -1, in, + 0); + } + } +} + +int get_hadamard_idx(int x, int adst_node_num) { + int max_bit = get_max_bit(adst_node_num - 1); + x = bitwise_reverse(x, max_bit); + + // gray code + int c = x & 1; + int p = x & 1; + int y = c; + + for (int i = 1; i <= max_bit; i++) { + p = c; + c = (x >> i) & 1; + y += (c ^ p) << i; + } + return y; +} + +void gen_adst_Ht_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num) { + for (int ni = 0; ni < adst_node_num; ni++) { + int out = node_idx + ni; + int in = node_idx + get_hadamard_idx(ni, adst_node_num); + connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0); + } +} + +void gen_adst_HtD_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num) { + for (int ni = 0; ni < adst_node_num; ni++) { + int out = node_idx + ni; + int in = node_idx + get_hadamard_idx(ni, adst_node_num); + double inW; + if (ni % 2 == 0) + inW = 1; + else + inW = -1; + connect_node(node, stage_num, node_num, stage_idx + 1, out, in, inW, in, 0); + } +} + +int get_adst_stage_num(int adst_node_num) { + return 2 * get_max_bit(adst_node_num) + 2; +} + +int gen_iadst_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num) { + int max_bit = get_max_bit(adst_node_num); + int si = 0; + gen_adst_IbarQ_graph(node, stage_num, node_num, stage_idx + si, node_idx, + adst_node_num); + si++; + gen_adst_VJ_graph(node, stage_num, node_num, stage_idx + si, node_idx, + adst_node_num); + si++; + for (int adst_idx = max_bit - 1; adst_idx >= 1; adst_idx--) { + gen_adst_U_graph(node, stage_num, node_num, stage_idx + si, node_idx, + adst_idx, adst_node_num); + si++; + gen_adst_V_graph(node, stage_num, node_num, stage_idx + si, node_idx, + adst_idx, adst_node_num); + si++; + } + gen_adst_HtD_graph(node, stage_num, node_num, stage_idx + si, node_idx, + adst_node_num); + si++; + return si + 1; +} + +int gen_adst_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num) { + int hybrid_stage_num = get_hybrid_stage_num(TYPE_ADST, adst_node_num); + // generate a adst tempNode + Node *tempNode = new Node[hybrid_stage_num * adst_node_num]; + init_graph(tempNode, hybrid_stage_num, adst_node_num); + int si = gen_iadst_graph(tempNode, hybrid_stage_num, adst_node_num, 0, 0, + adst_node_num); + + // tempNode's inverse graph to node[stage_idx][node_idx] + gen_inv_graph(tempNode, hybrid_stage_num, adst_node_num, node, stage_num, + node_num, stage_idx, node_idx); + delete[] tempNode; + return si; +} + +void connect_layer_2d(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int dct_node_num) { + for (int first = 0; first < dct_node_num; first++) { + for (int second = 0; second < dct_node_num; second++) { + // int sIn = stage_idx; + int sOut = stage_idx + 1; + int nIn = node_idx + first * dct_node_num + second; + int nOut = node_idx + second * dct_node_num + first; + + // printf("sIn: %d nIn: %d sOut: %d nOut: %d\n", sIn, nIn, sOut, nOut); + + connect_node(node, stage_num, node_num, sOut, nOut, nIn, 1, nIn, 0); + } + } +} + +void connect_layer_2d_new(Node *node, int stage_num, int node_num, + int stage_idx, int node_idx, int dct_node_num0, + int dct_node_num1) { + for (int i = 0; i < dct_node_num1; i++) { + for (int j = 0; j < dct_node_num0; j++) { + // int sIn = stage_idx; + int sOut = stage_idx + 1; + int nIn = node_idx + i * dct_node_num0 + j; + int nOut = node_idx + j * dct_node_num1 + i; + + // printf("sIn: %d nIn: %d sOut: %d nOut: %d\n", sIn, nIn, sOut, nOut); + + connect_node(node, stage_num, node_num, sOut, nOut, nIn, 1, nIn, 0); + } + } +} + +void gen_DCT_graph_2d(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int dct_node_num) { + int dct_stage_num = get_dct_stage_num(dct_node_num); + // put 2 layers of dct_node_num DCTs on the graph + for (int ni = 0; ni < dct_node_num; ni++) { + gen_DCT_graph_1d(node, stage_num, node_num, stage_idx, + node_idx + ni * dct_node_num, dct_node_num); + gen_DCT_graph_1d(node, stage_num, node_num, stage_idx + dct_stage_num, + node_idx + ni * dct_node_num, dct_node_num); + } + // connect first layer and second layer + connect_layer_2d(node, stage_num, node_num, stage_idx + dct_stage_num - 1, + node_idx, dct_node_num); +} + +int get_hybrid_stage_num(int type, int hybrid_node_num) { + if (type == TYPE_DCT || type == TYPE_IDCT) { + return get_dct_stage_num(hybrid_node_num); + } else if (type == TYPE_ADST || type == TYPE_IADST) { + return get_adst_stage_num(hybrid_node_num); + } + return 0; +} + +int get_hybrid_2d_stage_num(int type0, int type1, int hybrid_node_num) { + int stage_num = 0; + stage_num += get_hybrid_stage_num(type0, hybrid_node_num); + stage_num += get_hybrid_stage_num(type1, hybrid_node_num); + return stage_num; +} + +int get_hybrid_2d_stage_num_new(int type0, int type1, int hybrid_node_num0, + int hybrid_node_num1) { + int stage_num = 0; + stage_num += get_hybrid_stage_num(type0, hybrid_node_num0); + stage_num += get_hybrid_stage_num(type1, hybrid_node_num1); + return stage_num; +} + +int get_hybrid_amplify_factor(int type, int hybrid_node_num) { + return get_max_bit(hybrid_node_num) - 1; +} + +void gen_hybrid_graph_1d(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int hybrid_node_num, int type) { + if (type == TYPE_DCT) { + gen_DCT_graph_1d(node, stage_num, node_num, stage_idx, node_idx, + hybrid_node_num); + } else if (type == TYPE_ADST) { + gen_adst_graph(node, stage_num, node_num, stage_idx, node_idx, + hybrid_node_num); + } else if (type == TYPE_IDCT) { + int hybrid_stage_num = get_hybrid_stage_num(type, hybrid_node_num); + // generate a dct tempNode + Node *tempNode = new Node[hybrid_stage_num * hybrid_node_num]; + init_graph(tempNode, hybrid_stage_num, hybrid_node_num); + gen_DCT_graph_1d(tempNode, hybrid_stage_num, hybrid_node_num, 0, 0, + hybrid_node_num); + + // tempNode's inverse graph to node[stage_idx][node_idx] + gen_inv_graph(tempNode, hybrid_stage_num, hybrid_node_num, node, stage_num, + node_num, stage_idx, node_idx); + delete[] tempNode; + } else if (type == TYPE_IADST) { + int hybrid_stage_num = get_hybrid_stage_num(type, hybrid_node_num); + // generate a adst tempNode + Node *tempNode = new Node[hybrid_stage_num * hybrid_node_num]; + init_graph(tempNode, hybrid_stage_num, hybrid_node_num); + gen_adst_graph(tempNode, hybrid_stage_num, hybrid_node_num, 0, 0, + hybrid_node_num); + + // tempNode's inverse graph to node[stage_idx][node_idx] + gen_inv_graph(tempNode, hybrid_stage_num, hybrid_node_num, node, stage_num, + node_num, stage_idx, node_idx); + delete[] tempNode; + } +} + +void gen_hybrid_graph_2d(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int hybrid_node_num, int type0, + int type1) { + int hybrid_stage_num = get_hybrid_stage_num(type0, hybrid_node_num); + + for (int ni = 0; ni < hybrid_node_num; ni++) { + gen_hybrid_graph_1d(node, stage_num, node_num, stage_idx, + node_idx + ni * hybrid_node_num, hybrid_node_num, + type0); + gen_hybrid_graph_1d(node, stage_num, node_num, stage_idx + hybrid_stage_num, + node_idx + ni * hybrid_node_num, hybrid_node_num, + type1); + } + + // connect first layer and second layer + connect_layer_2d(node, stage_num, node_num, stage_idx + hybrid_stage_num - 1, + node_idx, hybrid_node_num); +} + +void gen_hybrid_graph_2d_new(Node *node, int stage_num, int node_num, + int stage_idx, int node_idx, int hybrid_node_num0, + int hybrid_node_num1, int type0, int type1) { + int hybrid_stage_num0 = get_hybrid_stage_num(type0, hybrid_node_num0); + + for (int ni = 0; ni < hybrid_node_num1; ni++) { + gen_hybrid_graph_1d(node, stage_num, node_num, stage_idx, + node_idx + ni * hybrid_node_num0, hybrid_node_num0, + type0); + } + for (int ni = 0; ni < hybrid_node_num0; ni++) { + gen_hybrid_graph_1d( + node, stage_num, node_num, stage_idx + hybrid_stage_num0, + node_idx + ni * hybrid_node_num1, hybrid_node_num1, type1); + } + + // connect first layer and second layer + connect_layer_2d_new(node, stage_num, node_num, + stage_idx + hybrid_stage_num0 - 1, node_idx, + hybrid_node_num0, hybrid_node_num1); +} + +void gen_inv_graph(Node *node, int stage_num, int node_num, Node *invNode, + int inv_stage_num, int inv_node_num, int inv_stage_idx, + int inv_node_idx) { + // clean up inNodeNum in invNode because of add_node + for (int si = 1 + inv_stage_idx; si < inv_stage_idx + stage_num; si++) { + for (int ni = inv_node_idx; ni < inv_node_idx + node_num; ni++) { + int idx = get_idx(si, ni, inv_node_num); + invNode[idx].inNodeNum = 0; + } + } + // generate inverse graph of node on invNode + for (int si = 1; si < stage_num; si++) { + for (int ni = 0; ni < node_num; ni++) { + int invSi = stage_num - si; + int idx = get_idx(si, ni, node_num); + for (int k = 0; k < node[idx].inNodeNum; k++) { + int invNi = node[idx].inNodeIdx[k]; + add_node(invNode, inv_stage_num, inv_node_num, invSi + inv_stage_idx, + invNi + inv_node_idx, ni + inv_node_idx, + node[idx].inWeight[k]); + } + } + } +} diff --git a/third_party/aom/tools/txfm_analyzer/txfm_graph.h b/third_party/aom/tools/txfm_analyzer/txfm_graph.h new file mode 100644 index 000000000..76a9bc732 --- /dev/null +++ b/third_party/aom/tools/txfm_analyzer/txfm_graph.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#ifndef TOOLS_TXFM_ANALYZER_H_ +#define TOOLS_TXFM_ANALYZER_H_ + +struct Node { + Node *inNode[2]; + int inNodeNum; + int inNodeIdx[2]; + double inWeight[2]; + double value; + int nodeIdx; + int stageIdx; + int visited; +}; + +#define PI (3.141592653589793238462643383279502884) +#define STAGENUM (10) +#define NODENUM (32) +#define COS_MOD (128) + +typedef enum { + TYPE_DCT = 0, + TYPE_ADST, + TYPE_IDCT, + TYPE_IADST, + TYPE_LAST +} TYPE_TXFM; + +TYPE_TXFM get_inv_type(TYPE_TXFM type); +void get_fun_name(char *str_fun_name, int str_buf_size, const TYPE_TXFM type, + const int txfm_size); + +void get_txfm_type_name(char *str_fun_name, int str_buf_size, + const TYPE_TXFM type, const int txfm_size); +void get_hybrid_2d_type_name(char *buf, int buf_size, const TYPE_TXFM type0, + const TYPE_TXFM type1, const int txfm_size0, + const int txfm_size1); +unsigned int get_max_bit(unsigned int x); +unsigned int bitwise_reverse(unsigned int x, int max_bit); +int get_idx(int ri, int ci, int cSize); + +int get_dct_stage_num(int size); +void reference_dct_1d(double *in, double *out, int size); +void reference_dct_2d(double *in, double *out, int size); +void connect_node(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int in0, double w0, int in1, double w1); +void propagate(Node *node, int stage_num, int node_num, int stage); +void init_graph(Node *node, int stage_num, int node_num); +void graph_reset_visited(Node *node, int stage_num, int node_num); +void gen_B_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int N, int star); +void gen_P_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int N); + +void gen_type1_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int N); +void gen_type2_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int N); +void gen_type3_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int idx, int N); +void gen_type4_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int idx, int N); + +void gen_R_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int N); + +void gen_DCT_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int N); + +void gen_DCT_graph_1d(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int dct_node_num); +void connect_layer_2d(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int dct_node_num); + +void gen_DCT_graph_2d(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int dct_node_num); + +void gen_adst_B_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_idx); + +void gen_adst_U_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_idx, int adst_node_num); +void gen_adst_T_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, double freq); + +void gen_adst_E_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_idx); + +void gen_adst_V_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_idx, int adst_node_num); + +void gen_adst_VJ_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num); +void gen_adst_Q_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num); +void gen_adst_Ibar_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num); + +void gen_adst_D_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num); + +int get_hadamard_idx(int x, int adst_node_num); +void gen_adst_Ht_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num); + +int gen_adst_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num); +int gen_iadst_graph(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int adst_node_num); +void reference_adst_1d(double *in, double *out, int size); + +int get_adst_stage_num(int adst_node_num); +int get_hybrid_stage_num(int type, int hybrid_node_num); +int get_hybrid_2d_stage_num(int type0, int type1, int hybrid_node_num); +int get_hybrid_2d_stage_num_new(int type0, int type1, int hybrid_node_num0, + int hybrid_node_num1); +int get_hybrid_amplify_factor(int type, int hybrid_node_num); +void gen_hybrid_graph_1d(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int hybrid_node_num, int type); +void gen_hybrid_graph_2d(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int hybrid_node_num, int type0, + int type1); +void gen_hybrid_graph_2d_new(Node *node, int stage_num, int node_num, + int stage_idx, int node_idx, int hybrid_node_num0, + int hybrid_node_num1, int type0, int type1); + +void reference_hybrid_2d(double *in, double *out, int size, int type0, + int type1); + +void reference_hybrid_2d_new(double *in, double *out, int size0, int size1, + int type0, int type1); +void reference_adst_dct_2d(double *in, double *out, int size); + +void gen_code(Node *node, int stage_num, int node_num, TYPE_TXFM type); + +void gen_inv_graph(Node *node, int stage_num, int node_num, Node *invNode, + int inv_stage_num, int inv_node_num, int inv_stage_idx, + int inv_node_idx); + +TYPE_TXFM hybrid_char_to_int(char ctype); + +int64_t round_shift(int64_t value, int bit); +void round_shift_array(int32_t *arr, int size, int bit); +void estimate_value(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int estimate_bit); +void amplify_value(Node *node, int stage_num, int node_num, int stage_idx, + int node_idx, int estimate_bit); +void propagate_estimate_amlify(Node *node, int stage_num, int node_num, + int stage_idx, int amplify_bit, + int estimate_bit); +#endif // TOOLS_TXFM_ANALYZER_H_ -- cgit v1.2.3