Import aom library

This is the reference implementation for the Alliance for Open Media's av1 video code. The commit used was 4d668d7feb1f8abd809d1bca0418570a7f142a36.
author: trav90 <travawine@palemoon.org> 2018-10-15 21:45:30 -0500
committer: trav90 <travawine@palemoon.org> 2018-10-15 21:45:30 -0500
commit: 68569dee1416593955c1570d638b3d9250b33012 (patch)
tree: d960f017cd7eba3f125b7e8a813789ee2e076310 /third_party/aom/av1/encoder/ratectrl_xiph.c
parent: 07c17b6b98ed32fcecff15c083ab0fd878de3cf0 (diff)
download: UXP-68569dee1416593955c1570d638b3d9250b33012.tar
UXP-68569dee1416593955c1570d638b3d9250b33012.tar.gz
UXP-68569dee1416593955c1570d638b3d9250b33012.tar.lz
UXP-68569dee1416593955c1570d638b3d9250b33012.tar.xz
UXP-68569dee1416593955c1570d638b3d9250b33012.zip
1 files changed, 1244 insertions, 0 deletions
diff --git a/third_party/aom/av1/encoder/ratectrl_xiph.c b/third_party/aom/av1/encoder/ratectrl_xiph.c
new file mode 100644
index 000000000..b9f827528
--- /dev/null
+++ b/third_party/aom/av1/encoder/ratectrl_xiph.c
@@ -0,0 +1,1244 @@
+/*
+ * Copyright (c) 2001-2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "av1/common/odintrin.h"
+#include "av1/encoder/ratectrl_xiph.h"
+
+#define OD_Q57(v) ((int64_t)((uint64_t)(v) << 57))
+#define OD_F_Q45(v) ((int64_t)(((v) * ((int64_t)1 << 45))))
+#define OD_F_Q12(v) ((int32_t)(((v) * ((int32_t)1 << 12))))
+
+/*A rough lookup table for tan(x), 0 <= x < pi/2.
+  The values are Q12 fixed-point and spaced at 5 degree intervals.
+  These decisions are somewhat arbitrary, but sufficient for the 2nd order
+   Bessel follower below.
+  Values of x larger than 85 degrees are extrapolated from the last interval,
+   which is way off, but "good enough".*/
+static uint16_t OD_ROUGH_TAN_LOOKUP[18] = { 0,     358,   722,  1098, 1491,
+                                            1910,  2365,  2868, 3437, 4096,
+                                            4881,  5850,  7094, 8784, 11254,
+                                            15286, 23230, 46817 };
+
+/*alpha is Q24 in the range [0,0.5).
+  The return values is 5.12.*/
+static int od_warp_alpha(int alpha) {
+  int i;
+  int d;
+  int t0;
+  int t1;
+  i = alpha * 36 >> 24;
+  if (i >= 17) i = 16;
+  t0 = OD_ROUGH_TAN_LOOKUP[i];
+  t1 = OD_ROUGH_TAN_LOOKUP[i + 1];
+  d = alpha * 36 - (i << 24);
+  return (int)((((int64_t)t0 << 32) + ((t1 - t0) << 8) * (int64_t)d) >> 32);
+}
+
+static const int64_t OD_ATANH_LOG2[32] = {
+  0x32B803473F7AD0F4LL, 0x2F2A71BD4E25E916LL, 0x2E68B244BB93BA06LL,
+  0x2E39FB9198CE62E4LL, 0x2E2E683F68565C8FLL, 0x2E2B850BE2077FC1LL,
+  0x2E2ACC58FE7B78DBLL, 0x2E2A9E2DE52FD5F2LL, 0x2E2A92A338D53EECLL,
+  0x2E2A8FC08F5E19B6LL, 0x2E2A8F07E51A485ELL, 0x2E2A8ED9BA8AF388LL,
+  0x2E2A8ECE2FE7384ALL, 0x2E2A8ECB4D3E4B1ALL, 0x2E2A8ECA94940FE8LL,
+  0x2E2A8ECA6669811DLL, 0x2E2A8ECA5ADEDD6ALL, 0x2E2A8ECA57FC347ELL,
+  0x2E2A8ECA57438A43LL, 0x2E2A8ECA57155FB4LL, 0x2E2A8ECA5709D510LL,
+  0x2E2A8ECA5706F267LL, 0x2E2A8ECA570639BDLL, 0x2E2A8ECA57060B92LL,
+  0x2E2A8ECA57060008LL, 0x2E2A8ECA5705FD25LL, 0x2E2A8ECA5705FC6CLL,
+  0x2E2A8ECA5705FC3ELL, 0x2E2A8ECA5705FC33LL, 0x2E2A8ECA5705FC30LL,
+  0x2E2A8ECA5705FC2FLL, 0x2E2A8ECA5705FC2FLL
+};
+
+static int od_ilog64(int64_t v) {
+  static const unsigned char OD_DEBRUIJN_IDX64[64] = {
+    0,  1,  2,  7,  3,  13, 8,  19, 4,  25, 14, 28, 9,  34, 20, 40,
+    5,  17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
+    63, 6,  12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56,
+    62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58
+  };
+  int ret;
+  v |= v >> 1;
+  v |= v >> 2;
+  v |= v >> 4;
+  v |= v >> 8;
+  v |= v >> 16;
+  v |= v >> 32;
+  ret = (int)v & 1;
+  v = (v >> 1) + 1;
+  ret += OD_DEBRUIJN_IDX64[v * UINT64_C(0x218A392CD3D5DBF) >> 58 & 0x3F];
+  return ret;
+}
+
+/*Computes the binary exponential of logq57.
+  input: a log base 2 in Q57 format
+  output: a 64 bit integer in Q0 (no fraction) */
+static int64_t od_bexp64(int64_t logq57) {
+  int64_t w;
+  int64_t z;
+  int ipart;
+  ipart = (int)(logq57 >> 57);
+  if (ipart < 0) return 0;
+  if (ipart >= 63) return 0x7FFFFFFFFFFFFFFFLL;
+  z = logq57 - OD_Q57(ipart);
+  if (z) {
+    int64_t mask;
+    int64_t wlo;
+    int i;
+    /*C doesn't give us 64x64->128 muls, so we use CORDIC.
+      This is not particularly fast, but it's not being used in time-critical
+       code; it is very accurate.*/
+    /*z is the fractional part of the log in Q62 format.
+      We need 1 bit of headroom since the magnitude can get larger than 1
+       during the iteration, and a sign bit.*/
+    z <<= 5;
+    /*w is the exponential in Q61 format (since it also needs headroom and can
+       get as large as 2.0); we could get another bit if we dropped the sign,
+       but we'll recover that bit later anyway.
+      Ideally this should start out as
+        \lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}}
+       but in order to guarantee convergence we have to repeat iterations 4,
+        13 (=3*4+1), and 40 (=3*13+1, etc.), so it winds up somewhat larger.*/
+    w = 0x26A3D0E401DD846DLL;
+    for (i = 0;; i++) {
+      mask = -(z < 0);
+      w += ((w >> (i + 1)) + mask) ^ mask;
+      z -= (OD_ATANH_LOG2[i] + mask) ^ mask;
+      /*Repeat iteration 4.*/
+      if (i >= 3) break;
+      z *= 2;
+    }
+    for (;; i++) {
+      mask = -(z < 0);
+      w += ((w >> (i + 1)) + mask) ^ mask;
+      z -= (OD_ATANH_LOG2[i] + mask) ^ mask;
+      /*Repeat iteration 13.*/
+      if (i >= 12) break;
+      z *= 2;
+    }
+    for (; i < 32; i++) {
+      mask = -(z < 0);
+      w += ((w >> (i + 1)) + mask) ^ mask;
+      z = (z - ((OD_ATANH_LOG2[i] + mask) ^ mask)) * 2;
+    }
+    wlo = 0;
+    /*Skip the remaining iterations unless we really require that much
+       precision.
+      We could have bailed out earlier for smaller iparts, but that would
+       require initializing w from a table, as the limit doesn't converge to
+       61-bit precision until n=30.*/
+    if (ipart > 30) {
+      /*For these iterations, we just update the low bits, as the high bits
+         can't possibly be affected.
+        OD_ATANH_LOG2 has also converged (it actually did so one iteration
+         earlier, but that's no reason for an extra special case).*/
+      for (;; i++) {
+        mask = -(z < 0);
+        wlo += ((w >> i) + mask) ^ mask;
+        z -= (OD_ATANH_LOG2[31] + mask) ^ mask;
+        /*Repeat iteration 40.*/
+        if (i >= 39) break;
+        z <<= 1;
+      }
+      for (; i < 61; i++) {
+        mask = -(z < 0);
+        wlo += ((w >> i) + mask) ^ mask;
+        z = (z - ((OD_ATANH_LOG2[31] + mask) ^ mask)) << 1;
+      }
+    }
+    w = (w << 1) + wlo;
+  } else {
+    w = (int64_t)1 << 62;
+  }
+  if (ipart < 62) {
+    w = ((w >> (61 - ipart)) + 1) >> 1;
+  }
+  return w;
+}
+
+/*Computes the binary log of w
+  input: a 64-bit integer in Q0 (no fraction)
+  output: a 64-bit log in Q57 */
+static int64_t od_blog64(int64_t w) {
+  int64_t z;
+  int ipart;
+  if (w <= 0) return -1;
+  ipart = od_ilog64(w) - 1;
+  if (ipart > 61) {
+    w >>= ipart - 61;
+  } else {
+    w <<= 61 - ipart;
+  }
+  z = 0;
+  if (w & (w - 1)) {
+    int64_t x;
+    int64_t y;
+    int64_t u;
+    int64_t mask;
+    int i;
+    /*C doesn't give us 64x64->128 muls, so we use CORDIC.
+      This is not particularly fast, but it's not being used in time-critical
+       code; it is very accurate.*/
+    /*z is the fractional part of the log in Q61 format.*/
+    /*x and y are the cosh() and sinh(), respectively, in Q61 format.
+      We are computing z = 2*atanh(y/x) = 2*atanh((w - 1)/(w + 1)).*/
+    x = w + ((int64_t)1 << 61);
+    y = w - ((int64_t)1 << 61);
+    for (i = 0; i < 4; i++) {
+      mask = -(y < 0);
+      z += ((OD_ATANH_LOG2[i] >> i) + mask) ^ mask;
+      u = x >> (i + 1);
+      x -= ((y >> (i + 1)) + mask) ^ mask;
+      y -= (u + mask) ^ mask;
+    }
+    /*Repeat iteration 4.*/
+    for (i--; i < 13; i++) {
+      mask = -(y < 0);
+      z += ((OD_ATANH_LOG2[i] >> i) + mask) ^ mask;
+      u = x >> (i + 1);
+      x -= ((y >> (i + 1)) + mask) ^ mask;
+      y -= (u + mask) ^ mask;
+    }
+    /*Repeat iteration 13.*/
+    for (i--; i < 32; i++) {
+      mask = -(y < 0);
+      z += ((OD_ATANH_LOG2[i] >> i) + mask) ^ mask;
+      u = x >> (i + 1);
+      x -= ((y >> (i + 1)) + mask) ^ mask;
+      y -= (u + mask) ^ mask;
+    }
+    /*OD_ATANH_LOG2 has converged.*/
+    for (; i < 40; i++) {
+      mask = -(y < 0);
+      z += ((OD_ATANH_LOG2[31] >> i) + mask) ^ mask;
+      u = x >> (i + 1);
+      x -= ((y >> (i + 1)) + mask) ^ mask;
+      y -= (u + mask) ^ mask;
+    }
+    /*Repeat iteration 40.*/
+    for (i--; i < 62; i++) {
+      mask = -(y < 0);
+      z += ((OD_ATANH_LOG2[31] >> i) + mask) ^ mask;
+      u = x >> (i + 1);
+      x -= ((y >> (i + 1)) + mask) ^ mask;
+      y -= (u + mask) ^ mask;
+    }
+    z = (z + 8) >> 4;
+  }
+  return OD_Q57(ipart) + z;
+}
+
+/*Convenience function converts Q57 value to a clamped 32-bit Q24 value
+  in: input in Q57 format.
+  Return: same number in Q24 */
+static int32_t od_q57_to_q24(int64_t in) {
+  int64_t ret;
+  ret = (in + ((int64_t)1 << 32)) >> 33;
+  /*0x80000000 is automatically converted to unsigned on 32-bit systems.
+    -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to
+    unsigned.*/
+  return (int32_t)OD_CLAMPI(-0x7FFFFFFF - 1, ret, 0x7FFFFFFF);
+}
+
+/*Binary exponential of log_scale with 24-bit fractional precision and
+   saturation.
+  log_scale: A binary logarithm in Q57 format.
+  Return: The binary exponential in Q24 format, saturated to 2**31-1 if
+   log_scale was too large.*/
+static int32_t od_bexp64_q24(int64_t log_scale) {
+  if (log_scale < OD_Q57(8)) {
+    int64_t ret;
+    ret = od_bexp64(log_scale + OD_Q57(24));
+    return ret < 0x7FFFFFFF ? (int32_t)ret : 0x7FFFFFFF;
+  }
+  return 0x7FFFFFFF;
+}
+
+/*Re-initialize Bessel filter coefficients with the specified delay.
+  This does not alter the x/y state, but changes the reaction time of the
+   filter.
+  Altering the time constant of a reactive filter without alterning internal
+   state is something that has to be done carefuly, but our design operates at
+   high enough delays and with small enough time constant changes to make it
+   safe.*/
+static void od_iir_bessel2_reinit(od_iir_bessel2 *f, int delay) {
+  int alpha;
+  int64_t one48;
+  int64_t warp;
+  int64_t k1;
+  int64_t k2;
+  int64_t d;
+  int64_t a;
+  int64_t ik2;
+  int64_t b1;
+  int64_t b2;
+  /*This borrows some code from an unreleased version of Postfish.
+    See the recipe at http://unicorn.us.com/alex/2polefilters.html for details
+     on deriving the filter coefficients.*/
+  /*alpha is Q24*/
+  alpha = (1 << 24) / delay;
+  one48 = (int64_t)1 << 48;
+  /*warp is 7.12*/
+  warp = OD_MAXI(od_warp_alpha(alpha), 1);
+  /*k1 is 9.12*/
+  k1 = 3 * warp;
+  /*k2 is 16.24.*/
+  k2 = k1 * warp;
+  /*d is 16.15.*/
+  d = ((((1 << 12) + k1) << 12) + k2 + 256) >> 9;
+  /*a is 0.32, since d is larger than both 1.0 and k2.*/
+  a = (k2 << 23) / d;
+  /*ik2 is 25.24.*/
+  ik2 = one48 / k2;
+  /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/
+  b1 = 2 * a * (ik2 - (1 << 24));
+  /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/
+  b2 = (one48 << 8) - ((4 * a) << 24) - b1;
+  /*All of the filter parameters are Q24.*/
+  f->c[0] = (int32_t)((b1 + ((int64_t)1 << 31)) >> 32);
+  f->c[1] = (int32_t)((b2 + ((int64_t)1 << 31)) >> 32);
+  f->g = (int32_t)((a + 128) >> 8);
+}
+
+/*Initialize a 2nd order low-pass Bessel filter with the corresponding delay
+   and initial value.
+  value is Q24.*/
+static void od_iir_bessel2_init(od_iir_bessel2 *f, int delay, int32_t value) {
+  od_iir_bessel2_reinit(f, delay);
+  f->y[1] = f->y[0] = f->x[1] = f->x[0] = value;
+}
+
+static int64_t od_iir_bessel2_update(od_iir_bessel2 *f, int32_t x) {
+  int64_t c0;
+  int64_t c1;
+  int64_t g;
+  int64_t x0;
+  int64_t x1;
+  int64_t y0;
+  int64_t y1;
+  int64_t ya;
+  c0 = f->c[0];
+  c1 = f->c[1];
+  g = f->g;
+  x0 = f->x[0];
+  x1 = f->x[1];
+  y0 = f->y[0];
+  y1 = f->y[1];
+  ya = ((x + x0 * 2 + x1) * g + y0 * c0 + y1 * c1 + (1 << 23)) >> 24;
+  f->x[1] = (int32_t)x0;
+  f->x[0] = x;
+  f->y[1] = (int32_t)y0;
+  f->y[0] = (int32_t)ya;
+  return ya;
+}
+
+static void od_enc_rc_reset(od_rc_state *rc) {
+  int64_t npixels;
+  int64_t ibpp;
+  rc->bits_per_frame = (int64_t)(rc->target_bitrate / rc->framerate);
+  /*Insane framerates or frame sizes mean insane bitrates.
+    Let's not get carried away.*/
+  if (rc->bits_per_frame > 0x400000000000LL) {
+    rc->bits_per_frame = (int64_t)0x400000000000LL;
+  } else {
+    if (rc->bits_per_frame < 32) {
+      rc->bits_per_frame = 32;
+    }
+  }
+  rc->reservoir_frame_delay = OD_MAXI(rc->reservoir_frame_delay, 12);
+  rc->reservoir_max = rc->bits_per_frame * rc->reservoir_frame_delay;
+  /*Start with a buffer fullness and fullness target of 50% */
+  rc->reservoir_target = (rc->reservoir_max + 1) >> 1;
+  rc->reservoir_fullness = rc->reservoir_target;
+  /*Pick exponents and initial scales for quantizer selection.*/
+  npixels = rc->frame_width * (int64_t)rc->frame_height;
+  rc->log_npixels = od_blog64(npixels);
+  ibpp = npixels / rc->bits_per_frame;
+  /*All of these initial scale/exp values are from Theora, and have not yet
+     been adapted to Daala, so they're certainly wrong.
+    The B-frame values especially are simply copies of the P-frame values.*/
+  if (ibpp < 1) {
+    rc->exp[OD_I_FRAME] = 59;
+    rc->log_scale[OD_I_FRAME] = od_blog64(1997) - OD_Q57(OD_COEFF_SHIFT);
+  } else if (ibpp < 2) {
+    rc->exp[OD_I_FRAME] = 55;
+    rc->log_scale[OD_I_FRAME] = od_blog64(1604) - OD_Q57(OD_COEFF_SHIFT);
+  } else {
+    rc->exp[OD_I_FRAME] = 48;
+    rc->log_scale[OD_I_FRAME] = od_blog64(834) - OD_Q57(OD_COEFF_SHIFT);
+  }
+  if (ibpp < 4) {
+    rc->exp[OD_P_FRAME] = 100;
+    rc->log_scale[OD_P_FRAME] = od_blog64(2249) - OD_Q57(OD_COEFF_SHIFT);
+  } else if (ibpp < 8) {
+    rc->exp[OD_P_FRAME] = 95;
+    rc->log_scale[OD_P_FRAME] = od_blog64(1751) - OD_Q57(OD_COEFF_SHIFT);
+  } else {
+    rc->exp[OD_P_FRAME] = 73;
+    rc->log_scale[OD_P_FRAME] = od_blog64(1260) - OD_Q57(OD_COEFF_SHIFT);
+  }
+  /*Golden P-frames both use the same log_scale and exp modeling
+     values as regular P-frames and the same scale follower.
+    For convenience in the rate calculation code, we maintain a copy of
+    the scale and exp values in OD_GOLDEN_P_FRAME.*/
+  rc->exp[OD_GOLDEN_P_FRAME] = rc->exp[OD_P_FRAME];
+  rc->log_scale[OD_GOLDEN_P_FRAME] = rc->log_scale[OD_P_FRAME];
+  rc->exp[OD_ALTREF_P_FRAME] = rc->exp[OD_P_FRAME];
+  rc->log_scale[OD_ALTREF_P_FRAME] = rc->log_scale[OD_P_FRAME];
+  /*We clamp the actual I and B frame delays to a minimum of 10 to work within
+     the range of values where later incrementing the delay works as designed.
+    10 is not an exact choice, but rather a good working trade-off.*/
+  rc->inter_p_delay = 10;
+  rc->inter_delay_target = rc->reservoir_frame_delay >> 1;
+  memset(rc->frame_count, 0, sizeof(rc->frame_count));
+  /*Drop-frame tracking is concerned with more than just the basic three frame
+     types.
+    It needs to track boosted and cut subtypes (of which there is only one
+     right now, OD_GOLDEN_P_FRAME). */
+  rc->prev_drop_count[OD_I_FRAME] = 0;
+  rc->log_drop_scale[OD_I_FRAME] = OD_Q57(0);
+  rc->prev_drop_count[OD_P_FRAME] = 0;
+  rc->log_drop_scale[OD_P_FRAME] = OD_Q57(0);
+  rc->prev_drop_count[OD_GOLDEN_P_FRAME] = 0;
+  rc->log_drop_scale[OD_GOLDEN_P_FRAME] = OD_Q57(0);
+  rc->prev_drop_count[OD_ALTREF_P_FRAME] = 0;
+  rc->log_drop_scale[OD_ALTREF_P_FRAME] = OD_Q57(0);
+  /*Set up second order followers, initialized according to corresponding
+     time constants.*/
+  od_iir_bessel2_init(&rc->scalefilter[OD_I_FRAME], 4,
+                      od_q57_to_q24(rc->log_scale[OD_I_FRAME]));
+  od_iir_bessel2_init(&rc->scalefilter[OD_P_FRAME], rc->inter_p_delay,
+                      od_q57_to_q24(rc->log_scale[OD_P_FRAME]));
+  od_iir_bessel2_init(&rc->vfrfilter[OD_I_FRAME], 4,
+                      od_bexp64_q24(rc->log_drop_scale[OD_I_FRAME]));
+  od_iir_bessel2_init(&rc->vfrfilter[OD_P_FRAME], 4,
+                      od_bexp64_q24(rc->log_drop_scale[OD_P_FRAME]));
+  od_iir_bessel2_init(&rc->vfrfilter[OD_GOLDEN_P_FRAME], 4,
+                      od_bexp64_q24(rc->log_drop_scale[OD_GOLDEN_P_FRAME]));
+  od_iir_bessel2_init(&rc->vfrfilter[OD_ALTREF_P_FRAME], 4,
+                      od_bexp64_q24(rc->log_drop_scale[OD_ALTREF_P_FRAME]));
+}
+
+int od_enc_rc_resize(od_rc_state *rc) {
+  /*If encoding has not yet begun, reset the buffer state.*/
+  if (rc->cur_frame == 0) {
+    od_enc_rc_reset(rc);
+  } else {
+    int idt;
+    /*Otherwise, update the bounds on the buffer, but not the current
+       fullness.*/
+    rc->bits_per_frame = (int64_t)(rc->target_bitrate / rc->framerate);
+    /*Insane framerates or frame sizes mean insane bitrates.
+      Let's not get carried away.*/
+    if (rc->bits_per_frame > 0x400000000000LL) {
+      rc->bits_per_frame = (int64_t)0x400000000000LL;
+    } else {
+      if (rc->bits_per_frame < 32) {
+        rc->bits_per_frame = 32;
+      }
+    }
+    rc->reservoir_frame_delay = OD_MAXI(rc->reservoir_frame_delay, 12);
+    rc->reservoir_max = rc->bits_per_frame * rc->reservoir_frame_delay;
+    rc->reservoir_target =
+        ((rc->reservoir_max + 1) >> 1) +
+        ((rc->bits_per_frame + 2) >> 2) *
+            OD_MINI(rc->keyframe_rate, rc->reservoir_frame_delay);
+    /*Update the INTER-frame scale filter delay.
+      We jump to it immediately if we've already seen enough frames; otherwise
+       it is simply set as the new target.*/
+    rc->inter_delay_target = idt = OD_MAXI(rc->reservoir_frame_delay >> 1, 10);
+    if (idt < OD_MINI(rc->inter_p_delay, rc->frame_count[OD_P_FRAME])) {
+      od_iir_bessel2_init(&rc->scalefilter[OD_P_FRAME], idt,
+                          rc->scalefilter[OD_P_FRAME].y[0]);
+      rc->inter_p_delay = idt;
+    }
+  }
+  return 0;
+}
+
+int od_enc_rc_init(od_rc_state *rc, int64_t bitrate, int delay_ms) {
+  if (rc->framerate <= 0) return 1;
+  if (rc->target_bitrate > 0) {
+    /*State has already been initialized; rather than reinitialize,
+      adjust the buffering for the new target rate. */
+    rc->target_bitrate = bitrate;
+    return od_enc_rc_resize(rc);
+  }
+  rc->target_quantizer = 0;
+  rc->target_bitrate = bitrate;
+  rc->rate_bias = 0;
+  if (bitrate > 0) {
+    /* The buffer size is clamped between [12, 256], this interval is short
+       enough to
+       allow reaction, but long enough to allow looking into the next GOP
+       (avoiding
+       the case where the last frames before an I-frame get starved).
+       The 12 frame minimum gives us some chance to distribute bit estimation
+       errors in the worst case. The 256 frame maximum means we'll require 8-10
+       seconds
+       of pre-buffering at 24-30 fps, which is not unreasonable.*/
+    rc->reservoir_frame_delay =
+        (int)OD_MINI((delay_ms / 1000) * rc->framerate, 256);
+    rc->drop_frames = 1;
+    rc->cap_overflow = 1;
+    rc->cap_underflow = 0;
+    rc->twopass_state = 0;
+    od_enc_rc_reset(rc);
+  }
+  return 0;
+}
+
+/*Scale the number of frames by the number of expected drops/duplicates.*/
+static int od_rc_scale_drop(od_rc_state *rc, int frame_type, int nframes) {
+  if (rc->prev_drop_count[frame_type] > 0 ||
+      rc->log_drop_scale[frame_type] > OD_Q57(0)) {
+    int64_t dup_scale;
+    dup_scale = od_bexp64(((rc->log_drop_scale[frame_type] +
+                            od_blog64(rc->prev_drop_count[frame_type] + 1)) >>
+                           1) +
+                          OD_Q57(8));
+    if (dup_scale < nframes << 8) {
+      int dup_scalei;
+      dup_scalei = (int)dup_scale;
+      if (dup_scalei > 0) {
+        nframes = ((nframes << 8) + dup_scalei - 1) / dup_scalei;
+      }
+    } else {
+      nframes = !!nframes;
+    }
+  }
+  return nframes;
+}
+
+/*Closed form version of frame determination code.
+  Used by rate control to predict frame types and subtypes into the future.
+  No side effects, may be called any number of times.
+  Note that it ignores end-of-file conditions; one-pass planning *should*
+   ignore end-of-file. */
+int od_frame_type(od_rc_state *rc, int64_t coding_frame_count, int *is_golden,
+                  int *is_altref, int64_t *ip_count) {
+  int frame_type;
+  if (coding_frame_count == 0) {
+    *is_golden = 1;
+    *is_altref = 1;
+    *ip_count = 0;
+    frame_type = OD_I_FRAME;
+  } else {
+    int keyrate = rc->keyframe_rate;
+    if (rc->closed_gop) {
+      int ip_per_gop;
+      int gop_n;
+      int gop_i;
+      ip_per_gop = (keyrate - 1) / 2;
+      gop_n = coding_frame_count / keyrate;
+      gop_i = coding_frame_count - gop_n * keyrate;
+      *ip_count = gop_n * ip_per_gop + (gop_i > 0) + (gop_i - 1);
+      frame_type = gop_i == 0 ? OD_I_FRAME : OD_P_FRAME;
+    } else {
+      int ip_per_gop;
+      int gop_n;
+      int gop_i;
+      ip_per_gop = (keyrate);
+      gop_n = (coding_frame_count - 1) / keyrate;
+      gop_i = coding_frame_count - gop_n * keyrate - 1;
+      *ip_count = (coding_frame_count > 0) + gop_n * ip_per_gop + (gop_i);
+      frame_type = gop_i / 1 < ip_per_gop - 1 ? OD_P_FRAME : OD_I_FRAME;
+    }
+  }
+  *is_golden =
+      (*ip_count % rc->goldenframe_rate) == 0 || frame_type == OD_I_FRAME;
+  *is_altref = (*ip_count % rc->altref_rate) == 0 || frame_type == OD_I_FRAME;
+  return frame_type;
+}
+
+/*Count frames types forward from the current frame up to but not including
+   the last I-frame in reservoir_frame_delay.
+  If reservoir_frame_delay contains no I-frames (or the current frame is the
+   only I-frame), count all reservoir_frame_delay frames.
+  Returns the number of frames counted.
+  Right now, this implementation is simple, brute-force, and expensive.
+  It is also easy to understand and debug.
+  TODO: replace with a virtual FIFO that keeps running totals as
+   repeating the counting over-and-over will have a performance impact on
+   whole-file 2pass usage.*/
+static int frame_type_count(od_rc_state *rc, int nframes[OD_FRAME_NSUBTYPES]) {
+  int i;
+  int j;
+  int acc[OD_FRAME_NSUBTYPES];
+  int count;
+  int reservoir_frames;
+  int reservoir_frame_delay;
+  memset(nframes, 0, OD_FRAME_NSUBTYPES * sizeof(*nframes));
+  memset(acc, 0, sizeof(acc));
+  count = 0;
+  reservoir_frames = 0;
+#if 1
+  /*Go ahead and count past end-of-stream.
+    We won't nail the exact bitrate on short files that end with a partial
+     GOP, but we also won't [potentially] destroy the quality of the last few
+     frames in that same case when we suddenly find out the stream is ending
+     before the original planning horizon.*/
+  reservoir_frame_delay = rc->reservoir_frame_delay;
+#else
+  /*Don't count past the end of the stream (once we know where end-of-stream
+     is).*/
+  reservoir_frame_delay =
+      rc->end_of_input ? rc->input_size + 1 : rc->reservoir_frame_delay;
+#endif
+  for (i = 0; i < reservoir_frame_delay; i++) {
+    int frame_type;
+    int is_golden;
+    int is_altref;
+    int64_t dummy;
+    frame_type =
+        od_frame_type(rc, rc->cur_frame + i, &is_golden, &is_altref, &dummy);
+    switch (frame_type) {
+      case OD_I_FRAME: {
+        for (j = 0; j < OD_FRAME_NSUBTYPES; j++) nframes[j] += acc[j];
+        reservoir_frames += count;
+        memset(acc, 0, sizeof(acc));
+        acc[OD_I_FRAME] = 1;
+        count = 1;
+        break;
+      }
+      case OD_P_FRAME: {
+        if (is_golden) {
+          ++acc[OD_GOLDEN_P_FRAME];
+          ++count;
+        } else if (is_altref) {
+          ++acc[OD_ALTREF_P_FRAME];
+          ++count;
+        } else {
+          ++acc[OD_P_FRAME];
+          ++count;
+        }
+        break;
+      }
+    }
+  }
+  /*If there were no I-frames at all, or only the first frame was an I-frame,
+     the accumulators never flushed and still contain the counts for the
+     entire buffer.
+    In both these cases, we return these counts.
+    Otherwise, we discard what remains in the accumulators as they contain
+     the counts from and past the last I-frame.*/
+  if (reservoir_frames == 0) {
+    for (i = 0; i < OD_FRAME_NSUBTYPES; i++) nframes[i] = acc[i];
+    reservoir_frames += count;
+  }
+  return reservoir_frames;
+}
+
+static int convert_to_ac_quant(int q, int bit_depth) {
+  return lrint(av1_convert_qindex_to_q(q, bit_depth));
+}
+
+int od_enc_rc_select_quantizers_and_lambdas(od_rc_state *rc,
+                                            int is_golden_frame,
+                                            int is_altref_frame, int frame_type,
+                                            int *bottom_idx, int *top_idx) {
+  int frame_subtype;
+  int64_t log_cur_scale;
+  int lossy_quantizer_min;
+  int lossy_quantizer_max;
+  double mqp_i = OD_MQP_I;
+  double mqp_p = OD_MQP_P;
+  double mqp_gp = OD_MQP_GP;
+  double mqp_ap = OD_MQP_AP;
+  int reservoir_frames;
+  int nframes[OD_FRAME_NSUBTYPES];
+  int32_t mqp_Q12[OD_FRAME_NSUBTYPES];
+  int64_t dqp_Q45[OD_FRAME_NSUBTYPES];
+  /*Verify the closed-form frame type determination code matches what the
+     input queue set.*/
+  /*One pseudo-non-closed-form caveat:
+    Once we've seen end-of-input, the batched frame determination code
+     suppresses the last open-GOP's I-frame (since it would only be
+     useful for the next GOP, which doesn't exist).
+     Thus, don't check one the input queue is drained.*/
+  if (!rc->end_of_input) {
+    int closed_form_type;
+    int closed_form_golden;
+    int closed_form_altref;
+    int64_t closed_form_cur_frame;
+    closed_form_type =
+        od_frame_type(rc, rc->cur_frame, &closed_form_golden,
+                      &closed_form_altref, &closed_form_cur_frame);
+    OD_UNUSED(closed_form_type);
+    OD_UNUSED(is_altref_frame);
+    assert(closed_form_type == frame_type);
+    assert(closed_form_cur_frame == rc->cur_frame);
+    assert(closed_form_altref == is_altref_frame);
+    assert(closed_form_golden == is_golden_frame);
+  }
+
+  log_cur_scale = (int64_t)rc->scalefilter[frame_type].y[0] << 33;
+
+  /*Count the various types and classes of frames.*/
+  reservoir_frames = frame_type_count(rc, nframes);
+  nframes[OD_I_FRAME] = od_rc_scale_drop(rc, OD_I_FRAME, nframes[OD_I_FRAME]);
+  nframes[OD_P_FRAME] = od_rc_scale_drop(rc, OD_P_FRAME, nframes[OD_P_FRAME]);
+  nframes[OD_GOLDEN_P_FRAME] =
+      od_rc_scale_drop(rc, OD_GOLDEN_P_FRAME, nframes[OD_GOLDEN_P_FRAME]);
+  nframes[OD_ALTREF_P_FRAME] =
+      od_rc_scale_drop(rc, OD_ALTREF_P_FRAME, nframes[OD_ALTREF_P_FRAME]);
+
+  switch (rc->twopass_state) {
+    default: break;
+    case 1: {
+      /*Pass 1 mode: use a fixed qi value.*/
+      return rc->firstpass_quant;
+    } break;
+    case 2: {
+      int i;
+      int64_t scale_sum[OD_FRAME_NSUBTYPES];
+      int qti;
+      /*Pass 2 mode: we know exactly how much of each frame type there is in
+         the current buffer window, and have estimates for the scales.*/
+      for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
+        nframes[i] = rc->nframes[i];
+        nframes[i] = rc->nframes[i];
+        scale_sum[i] = rc->scale_sum[i];
+      }
+      /*If we're not using the same frame type as in pass 1 (because someone
+         changed the keyframe interval), remove that scale estimate.
+        We'll add in a replacement for the correct frame type below.*/
+      qti = rc->cur_metrics.frame_type;
+      if (qti != frame_type) {
+        nframes[qti]--;
+        scale_sum[qti] -= od_bexp64_q24(rc->cur_metrics.log_scale);
+      }
+      /*Compute log_scale estimates for each frame type from the pass-1 scales
+         we measured in the current window.*/
+      for (qti = 0; qti < OD_FRAME_NSUBTYPES; qti++) {
+        rc->log_scale[qti] = nframes[qti] > 0
+                                 ? od_blog64(scale_sum[qti]) -
+                                       od_blog64(nframes[qti]) - OD_Q57(24)
+                                 : -rc->log_npixels;
+      }
+      /*If we're not using the same frame type as in pass 1, add a scale
+         estimate for the corresponding frame using the current low-pass
+         filter value.
+        This is mostly to ensure we have a valid estimate even when pass 1 had
+         no frames of this type in the buffer window.
+        TODO: We could also plan ahead and figure out how many keyframes we'll
+         be forced to add in the current buffer window.*/
+      qti = rc->cur_metrics.frame_type;
+      if (qti != frame_type) {
+        int64_t scale;
+        scale = rc->log_scale[frame_type] < OD_Q57(23)
+                    ? od_bexp64(rc->log_scale[frame_type] + OD_Q57(24))
+                    : 0x7FFFFFFFFFFFLL;
+        scale *= nframes[frame_type];
+        nframes[frame_type]++;
+        scale += od_bexp64_q24(log_cur_scale >> 33);
+        rc->log_scale[frame_type] =
+            od_blog64(scale) - od_blog64(nframes[qti]) - OD_Q57(24);
+      } else {
+        log_cur_scale = (int64_t)rc->cur_metrics.log_scale << 33;
+      }
+    } break;
+  }
+
+  /*Quantizer selection sticks to the codable, lossy portion of the quantizer
+    range.*/
+  lossy_quantizer_min = convert_to_ac_quant(rc->minq, rc->bit_depth);
+  lossy_quantizer_max = convert_to_ac_quant(rc->maxq, rc->bit_depth);
+  frame_subtype = frame_type;
+  /*Stash quantizer modulation by frame type.*/
+  mqp_Q12[OD_I_FRAME] = OD_F_Q12(mqp_i);
+  mqp_Q12[OD_P_FRAME] = OD_F_Q12(mqp_p);
+  mqp_Q12[OD_GOLDEN_P_FRAME] = OD_F_Q12(mqp_gp);
+  mqp_Q12[OD_ALTREF_P_FRAME] = OD_F_Q12(mqp_ap);
+  dqp_Q45[OD_I_FRAME] = OD_F_Q45(OD_DQP_I);
+  dqp_Q45[OD_P_FRAME] = OD_F_Q45(OD_DQP_P);
+  dqp_Q45[OD_GOLDEN_P_FRAME] = OD_F_Q45(OD_DQP_GP);
+  dqp_Q45[OD_ALTREF_P_FRAME] = OD_F_Q45(OD_DQP_AP);
+  /*Is rate control active?*/
+  if (rc->target_bitrate <= 0) {
+    /*Rate control is not active; derive quantizer directly from
+      quality parameter and frame type. */
+    /*Can't use the OD_LOSSLESS macro, as it uses state.quantizer to intuit,
+      and we've not set it yet.*/
+    if (rc->quality == 0) {
+      /*Lossless coding requested.*/
+      rc->base_quantizer = 0;
+      rc->target_quantizer = 0;
+    } else {
+      int64_t log_quantizer;
+
+      /* Adjust the modulation constants using the last frame's quantizer. */
+      double mqp_delta = (255 - rc->target_quantizer) / 2000.0f;
+      mqp_i -= mqp_delta;
+      mqp_p += mqp_delta;
+      mqp_gp -= mqp_delta;
+      mqp_Q12[OD_I_FRAME] = OD_F_Q12(mqp_i);
+      mqp_Q12[OD_P_FRAME] = OD_F_Q12(mqp_p);
+      mqp_Q12[OD_GOLDEN_P_FRAME] = OD_F_Q12(mqp_gp);
+      mqp_Q12[OD_ALTREF_P_FRAME] = OD_F_Q12(mqp_ap);
+
+      if (rc->quality == -1) {
+        /*A quality of -1 means quality was unset; use a default.*/
+        rc->base_quantizer = convert_to_ac_quant(10, rc->bit_depth);
+      } else {
+        rc->base_quantizer = convert_to_ac_quant(rc->quality, rc->bit_depth);
+      }
+
+      if (rc->periodic_boosts && !is_golden_frame) {
+        int pattern_rate = (rc->goldenframe_rate >> 1);
+        int dist_to_golden = rc->cur_frame % pattern_rate;
+        int dist_away_golden = pattern_rate - dist_to_golden;
+        int boost = dist_to_golden;
+        if (dist_away_golden > dist_to_golden) boost = dist_away_golden;
+        boost -= pattern_rate;
+        boost *= (rc->base_quantizer) / OD_PERIODIC_BOOST_DIV;
+        rc->base_quantizer = rc->base_quantizer + boost;
+      }
+
+      /*As originally written, qp modulation is applied to the coded quantizer.
+        Because we now have and use a more precise target quantizer for various
+        calculation, that needs to be modulated as well.
+        Calculate what is, effectively, a fractional coded quantizer. */
+      /*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
+      log_quantizer = od_blog64(rc->base_quantizer) - OD_Q57(OD_COEFF_SHIFT);
+      /*log_quantizer to Q21.*/
+      log_quantizer >>= 36;
+      /*scale log quantizer, result is Q33.*/
+      log_quantizer *= OD_LOG_QUANTIZER_BASE_Q12;
+      /*Add Q33 offset to Q33 log_quantizer.*/
+      log_quantizer += OD_LOG_QUANTIZER_OFFSET_Q45 >> 12;
+      /*Modulate quantizer according to frame type; result is Q45.*/
+      log_quantizer *= mqp_Q12[frame_subtype];
+      /*Add Q45 boost/cut to Q45 fractional coded quantizer.*/
+      log_quantizer += dqp_Q45[frame_subtype];
+      /*Back to log2 quantizer in Q57.*/
+      log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
+                          OD_LOG_QUANTIZER_EXP_Q12 +
+                      OD_Q57(OD_COEFF_SHIFT);
+      /*Convert Q57 log2 quantizer to unclamped linear target quantizer value.*/
+      rc->target_quantizer = od_bexp64(log_quantizer);
+    }
+  } else {
+    int clamp;
+    int64_t rate_bias;
+    int64_t rate_total;
+    int base_quantizer;
+    int64_t log_quantizer;
+    int qlo;
+    int qhi;
+    int i;
+    /*We clamp the allowed amount of qi change (after initialization).*/
+    clamp = rc->cur_frame > 0;
+    /*Figure out how to re-distribute bits so that we hit our fullness target
+       before the last keyframe in our current buffer window (after the current
+       frame), or the end of the buffer window, whichever comes first.*/
+    /*Single pass only right now.*/
+    /*If we've been missing our target, add a penalty term.*/
+    rate_bias = (rc->rate_bias / (rc->cur_frame + 1000)) * reservoir_frames;
+    /*rate_total is the total bits available over the next
+       reservoir_frames frames.*/
+    rate_total = rc->reservoir_fullness - rc->reservoir_target + rate_bias +
+                 reservoir_frames * rc->bits_per_frame;
+    /*Find a target quantizer that meets our rate target for the specific mix
+       of frame types we'll have over the next frame_delay frames.
+      We model the rate<->quantizer relationship as:
+       rate = scale*(quantizer**-exp)
+      In this case, we have our desired rate, an exponent selected in setup,
+       and a scale that's been measured over our frame history, so we're
+       solving for the quantizer.
+      Exponentiation with arbitrary exponents is expensive, so we work in
+       the binary log domain (binary exp and log aren't too bad):
+       rate = e2(log2_scale - log2_quantizer * exp)
+      There's no easy closed form solution, so we bisection search for it.*/
+    /*We do not currently allow rate control to select lossless encoding.*/
+    qlo = 1;
+    /*If there's a quality specified, it's used to select the
+       coarsest base quantizer we can select.
+      Otherwise we can use up to and including the coarsest codable
+       quantizer.*/
+    if (rc->quality > 0)
+      qhi = convert_to_ac_quant(rc->quality, rc->bit_depth);
+    else
+      qhi = lossy_quantizer_max;
+    base_quantizer = (qlo + qhi) >> 1;
+    while (qlo < qhi) {
+      volatile int64_t log_base_quantizer;
+      int64_t diff;
+      int64_t bits;
+      /*Count bits contributed by each frame type using the model.*/
+      bits = 0;
+      log_base_quantizer = od_blog64(base_quantizer);
+      for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
+        /*Modulate base quantizer by frame type.*/
+        /*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
+        log_quantizer = log_base_quantizer - OD_Q57(OD_COEFF_SHIFT);
+        /*log_quantizer to Q21.*/
+        log_quantizer >>= 36;
+        /*scale log quantizer, result is Q33.*/
+        log_quantizer *= OD_LOG_QUANTIZER_BASE_Q12;
+        /*Add Q33 offset to Q33 log_quantizer.*/
+        log_quantizer += OD_LOG_QUANTIZER_OFFSET_Q45 >> 12;
+        /*Modulate quantizer according to frame type; result is Q45.*/
+        log_quantizer *= mqp_Q12[i];
+        /*Add Q45 boost/cut to Q45 fractional coded quantizer.*/
+        log_quantizer += dqp_Q45[i];
+        /*Back to log2 quantizer in Q57.*/
+        log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
+                            OD_LOG_QUANTIZER_EXP_Q12 +
+                        OD_Q57(OD_COEFF_SHIFT);
+        /*Clamp modulated quantizer values.*/
+        log_quantizer = OD_CLAMPI(od_blog64(lossy_quantizer_min), log_quantizer,
+                                  od_blog64(lossy_quantizer_max));
+        /* All the fields here are Q57 except for the exponent which is Q6.*/
+        bits += nframes[i] * od_bexp64(rc->log_scale[i] + rc->log_npixels -
+                                       (log_quantizer >> 6) * rc->exp[i]);
+      }
+      diff = bits - rate_total;
+      if (diff > 0) {
+        qlo = base_quantizer + 1;
+      } else if (diff < 0) {
+        qhi = base_quantizer - 1;
+      } else {
+        break;
+      }
+      base_quantizer = (qlo + qhi) >> 1;
+    }
+    /*If this was not one of the initial frames, limit the change in base
+       quantizer to within [0.8*Q,1.2*Q], where Q is the previous frame's
+       base quantizer.*/
+    if (clamp) {
+      base_quantizer = OD_CLAMPI((rc->base_quantizer * 0x0CCCD + 0x8000) >> 16,
+                                 base_quantizer,
+                                 (rc->base_quantizer * 0x13333 + 0x8000) >> 16);
+    }
+    /*Modulate chosen base quantizer to produce target quantizer.*/
+    log_quantizer = od_blog64(base_quantizer);
+    /*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
+    log_quantizer -= OD_Q57(OD_COEFF_SHIFT);
+    /*log_quantizer to Q21.*/
+    log_quantizer >>= 36;
+    /*scale log quantizer, result is Q33.*/
+    log_quantizer *= OD_LOG_QUANTIZER_BASE_Q12;
+    /*Add Q33 offset to Q33 log_quantizer.*/
+    log_quantizer += OD_LOG_QUANTIZER_OFFSET_Q45 >> 12;
+    /*Modulate quantizer according to frame type; result is Q45.*/
+    log_quantizer *= mqp_Q12[frame_subtype];
+    /*Add Q45 boost/cut to Q45 fractional coded quantizer.*/
+    log_quantizer += dqp_Q45[frame_subtype];
+    /*Back to log2 quantizer in Q57.*/
+    log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
+                        OD_LOG_QUANTIZER_EXP_Q12 +
+                    OD_Q57(OD_COEFF_SHIFT);
+    /*Clamp modulated quantizer values.*/
+    log_quantizer = OD_CLAMPI(od_blog64(lossy_quantizer_min), log_quantizer,
+                              od_blog64(lossy_quantizer_max));
+    /*The above allocation looks only at the total rate we'll accumulate in
+       the next reservoir_frame_delay frames.
+      However we could overflow the bit reservoir on the very next frame, so
+       check for that here if we're not using a soft target.*/
+    if (rc->cap_overflow) {
+      int64_t margin;
+      int64_t soft_limit;
+      int64_t log_soft_limit;
+      int64_t log_scale_pixels;
+      int64_t exp;
+      int64_t log_qexp;
+      /*Allow 3% of the buffer for prediction error.
+        This should be plenty, and we don't mind if we go a bit over; we only
+         want to keep these bits from being completely wasted.*/
+      margin = (rc->reservoir_max + 31) >> 5;
+      /*We want to use at least this many bits next frame.*/
+      soft_limit = rc->reservoir_fullness + rc->bits_per_frame -
+                   (rc->reservoir_max - margin);
+      log_soft_limit = od_blog64(soft_limit);
+      /*If we're predicting we won't use that many bits...*/
+      log_scale_pixels = rc->log_scale[frame_subtype] + rc->log_npixels;
+      exp = rc->exp[frame_subtype];
+      log_qexp = (log_quantizer >> 6) * exp;
+      if (log_scale_pixels - log_qexp < log_soft_limit) {
+        /*Scale the adjustment based on how far into the margin we are.*/
+        log_qexp += ((log_scale_pixels - log_soft_limit - log_qexp) >> 32) *
+                    (OD_MINI(margin, soft_limit) << 32) / margin;
+        log_quantizer = (((log_qexp + (exp >> 1)) / exp) << 6);
+      }
+    }
+    /*We just checked we don't overflow the reservoir next frame, now check
+       we don't underflow and bust the budget (when not using a soft target).
+      Disabled when a quality bound is set; if we saturate quantizer to the
+       maximum possible size when we have a limiting max quality, the
+       resulting lambda can cause strange behavior.*/
+    if (rc->quality == -1) {
+      int64_t exp;
+      int64_t log_qexp;
+      int64_t log_scale_pixels;
+      int64_t log_hard_limit;
+      /*Compute the maximum number of bits we can use in the next frame.
+        Allow 50% of the rate for a single frame for prediction error.
+        This may not be enough for keyframes or sudden changes in
+         complexity.*/
+      log_hard_limit =
+          od_blog64(rc->reservoir_fullness + (rc->bits_per_frame >> 1));
+      /*If we're predicting we'll use more than this...*/
+      log_scale_pixels = rc->log_scale[frame_subtype] + rc->log_npixels;
+      exp = rc->exp[frame_subtype];
+      log_qexp = (log_quantizer >> 6) * exp;
+      if (log_scale_pixels - log_qexp > log_hard_limit) {
+        /*Force the target to hit our limit exactly.*/
+        log_qexp = log_scale_pixels - log_hard_limit;
+        log_quantizer = (log_qexp + (exp >> 1)) / exp << 6;
+        /*If that target is unreasonable, oh well; we'll have to drop.*/
+        log_quantizer = OD_MAXI(log_quantizer, od_blog64(lossy_quantizer_max));
+      }
+    }
+    /*Compute a final estimate of the number of bits we plan to use, update
+       the running rate bias measurement.*/
+    {
+      int64_t log_qexp;
+      int64_t log_scale_pixels;
+      log_scale_pixels = rc->log_scale[frame_subtype] + rc->log_npixels;
+      log_qexp = (log_quantizer >> 6) * rc->exp[frame_subtype];
+      rc->rate_bias += od_bexp64(log_scale_pixels - log_qexp);
+    }
+    rc->target_quantizer = od_bexp64(log_quantizer);
+    /*The various cappings and adjustments may have altered the log_quantizer
+       target significantly.
+      We can either update the base quantizer to be consistent with the
+       target or let it track separately.
+      Theora behavior effectively keeps them consistent, as it regenerates
+       the effective base quantizer from the target each frame rather than
+       saving both.
+      For Daala, it's easier to allow them to track separately.
+      For now, allow them to track separately and see how it behaves.*/
+    rc->base_quantizer = base_quantizer;
+  }
+  *bottom_idx = lossy_quantizer_min;
+  *top_idx = lossy_quantizer_max;
+  rc->target_quantizer = av1_qindex_from_ac(
+      OD_CLAMPI(lossy_quantizer_min, rc->target_quantizer, lossy_quantizer_max),
+      rc->bit_depth);
+  return rc->target_quantizer;
+}
+
+int od_enc_rc_update_state(od_rc_state *rc, int64_t bits, int is_golden_frame,
+                           int is_altref_frame, int frame_type, int droppable) {
+  int dropped;
+  dropped = 0;
+  /*Update rate control only if rate control is active.*/
+  if (rc->target_bitrate > 0) {
+    int64_t log_scale;
+    int frame_subtype;
+    frame_subtype = frame_type;
+    /*Track non-golden and golden P frame drops separately.*/
+    if (is_golden_frame && frame_type == OD_P_FRAME)
+      frame_subtype = OD_GOLDEN_P_FRAME;
+    else if (is_altref_frame && frame_type == OD_P_FRAME)
+      frame_subtype = OD_ALTREF_P_FRAME;
+    if (bits <= 0) {
+      /*We didn't code any blocks in this frame.*/
+      log_scale = OD_Q57(-64);
+      bits = 0;
+      ++rc->prev_drop_count[frame_subtype];
+    } else {
+      int64_t log_bits;
+      int64_t log_qexp;
+      /*Compute the estimated scale factor for this frame type.*/
+      log_bits = od_blog64(bits);
+      log_qexp = od_blog64(rc->target_quantizer);
+      log_qexp = (log_qexp >> 6) * (rc->exp[frame_type]);
+      log_scale = OD_MINI(log_bits - rc->log_npixels + log_qexp, OD_Q57(16));
+    }
+
+    switch (rc->twopass_state) {
+      case 1: {
+        int golden, altref;
+        int64_t ipc;
+        rc->cur_metrics.frame_type =
+            od_frame_type(rc, rc->cur_frame, &golden, &altref, &ipc);
+        /*Pass 1 mode: save the metrics for this frame.*/
+        rc->cur_metrics.log_scale = od_q57_to_q24(log_scale);
+      } break;
+      case 2: {
+        /*Pass 2 mode:*/
+        int m_frame_type = rc->cur_metrics.frame_type;
+        rc->nframes[m_frame_type]--;
+        rc->scale_sum[m_frame_type] -= od_bexp64_q24(rc->cur_metrics.log_scale);
+      } break;
+    }
+
+    if (bits > 0) {
+      od_iir_bessel2 *f;
+      /*If this is the first example of the given frame type we've
+         seen, we immediately replace the default scale factor guess
+         with the estimate we just computed using the first frame.*/
+      if (rc->frame_count[frame_type] == 0) {
+        f = rc->scalefilter + frame_type;
+        f->y[1] = f->y[0] = f->x[1] = f->x[0] = od_q57_to_q24(log_scale);
+        rc->log_scale[frame_type] = log_scale;
+      } else {
+        /*Lengthen the time constant for the inter filters as we collect more
+           frame statistics, until we reach our target.*/
+        if (frame_type != OD_I_FRAME &&
+            rc->inter_p_delay < rc->inter_delay_target &&
+            rc->frame_count[frame_type] >= rc->inter_p_delay) {
+          od_iir_bessel2_reinit(&rc->scalefilter[frame_type],
+                                ++rc->inter_p_delay);
+        }
+        /*Update the low-pass scale filter for this frame type
+           regardless of whether or not we drop this frame.*/
+        rc->log_scale[frame_type] =
+            od_iir_bessel2_update(rc->scalefilter + frame_type,
+                                  od_q57_to_q24(log_scale))
+            << 33;
+      }
+      /*If this frame busts our budget, it must be dropped.*/
+      if (droppable && rc->reservoir_fullness + rc->bits_per_frame < bits) {
+        ++rc->prev_drop_count[frame_subtype];
+        bits = 0;
+        dropped = 1;
+      } else {
+        uint32_t drop_count;
+        /*Update a low-pass filter to estimate the "real" frame rate taking
+           drops into account.
+          This is only done if the frame is coded, as it needs the final
+           count of dropped frames.*/
+        drop_count = rc->prev_drop_count[frame_subtype] + 1;
+        if (drop_count > 0x7F) {
+          drop_count = 0x7FFFFFFF;
+        } else {
+          drop_count <<= 24;
+        }
+        rc->log_drop_scale[frame_subtype] =
+            od_blog64(od_iir_bessel2_update(rc->vfrfilter + frame_subtype,
+                                            drop_count)) -
+            OD_Q57(24);
+        /*Zero the drop count for this frame.
+          It will be increased if we drop frames.*/
+        rc->prev_drop_count[frame_subtype] = 0;
+      }
+      /*Increment the frame count for filter adaptation purposes.*/
+      if (!rc->twopass_state) rc->frame_count[frame_type]++;
+    }
+    rc->reservoir_fullness += rc->bits_per_frame - bits;
+    /*If we're too quick filling the buffer and overflow is capped,
+      that rate is lost forever.*/
+    if (rc->cap_overflow && rc->reservoir_fullness > rc->reservoir_max) {
+      rc->reservoir_fullness = rc->reservoir_max;
+    }
+    /*If we're too quick draining the buffer and underflow is capped,
+      don't try to make up that rate later.*/
+    if (rc->cap_underflow && rc->reservoir_fullness < 0) {
+      rc->reservoir_fullness = 0;
+    }
+    /*Adjust the bias for the real bits we've used.*/
+    rc->rate_bias -= bits;
+  }
+  return dropped;
+}
+
+static INLINE void od_rc_buffer_val(od_rc_state *rc, int64_t val, int bytes) {
+  while (bytes-- > 0) {
+    rc->twopass_buffer[rc->twopass_buffer_bytes++] = (uint8_t)(val & 0xFF);
+    val >>= 8;
+  }
+}
+
+static INLINE int64_t od_rc_unbuffer_val(od_rc_state *rc, int bytes) {
+  int64_t ret = 0;
+  int shift = 0;
+  while (bytes-- > 0) {
+    ret |= ((int64_t)rc->twopass_buffer[rc->twopass_buffer_bytes++]) << shift;
+    shift += 8;
+  }
+  return ret;
+}
+
+int od_enc_rc_2pass_out(od_rc_state *rc, struct aom_codec_pkt_list *pkt_list,
+                        int summary) {
+  int i;
+  struct aom_codec_cx_pkt pkt;
+  rc->twopass_buffer = rc->firstpass_buffer;
+  rc->twopass_buffer_bytes = 0;
+  if (!rc->twopass_state) {
+    rc->twopass_state = 1;
+    for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
+      rc->frame_count[i] = 0;
+      rc->exp[i] = 0;
+      rc->scale_sum[i] = 0;
+    }
+  }
+  if (summary) {
+    od_rc_buffer_val(rc, OD_RC_2PASS_MAGIC, 4);
+    od_rc_buffer_val(rc, OD_RC_2PASS_VERSION, 1);
+    for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
+      od_rc_buffer_val(rc, rc->frame_count[i], 4);
+      od_rc_buffer_val(rc, rc->exp[i], 4);
+      od_rc_buffer_val(rc, rc->scale_sum[i], 8);
+    }
+  } else {
+    int frame_type = rc->cur_metrics.frame_type;
+    rc->scale_sum[frame_type] += od_bexp64_q24(rc->cur_metrics.log_scale);
+    rc->frame_count[frame_type]++;
+    od_rc_buffer_val(rc, rc->cur_metrics.frame_type, 1);
+    od_rc_buffer_val(rc, rc->cur_metrics.log_scale, 4);
+  }
+  pkt.data.twopass_stats.buf = rc->firstpass_buffer;
+  pkt.data.twopass_stats.sz = rc->twopass_buffer_bytes;
+  pkt.kind = AOM_CODEC_STATS_PKT;
+  aom_codec_pkt_list_add(pkt_list, &pkt);
+  return 0;
+}
+
+int od_enc_rc_2pass_in(od_rc_state *rc) {
+  /* Enable pass 2 mode if this is the first call. */
+  if (rc->twopass_state == 0) {
+    uint32_t i, total_frames = 0;
+
+    if (!rc->twopass_allframes_buf ||
+        rc->twopass_allframes_buf_size < OD_RC_2PASS_MIN)
+      return -1;
+
+    /* Find summary packet at the end */
+    rc->twopass_buffer = rc->twopass_allframes_buf;
+    rc->twopass_buffer +=
+        rc->twopass_allframes_buf_size - OD_RC_2PASS_SUMMARY_SZ;
+    rc->twopass_buffer_bytes = 0;
+
+    if (od_rc_unbuffer_val(rc, 4) != OD_RC_2PASS_MAGIC) return -1;
+    if (od_rc_unbuffer_val(rc, 1) != OD_RC_2PASS_VERSION) return -1;
+
+    for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
+      rc->frame_count[i] = od_rc_unbuffer_val(rc, 4);
+      rc->exp[i] = od_rc_unbuffer_val(rc, 4);
+      rc->scale_sum[i] = od_rc_unbuffer_val(rc, 8);
+      rc->nframes[i] = rc->frame_count[i];
+      total_frames += rc->frame_count[i];
+    }
+
+    if (total_frames < 1) return -1;
+
+    if (total_frames * OD_RC_2PASS_PACKET_SZ > rc->twopass_allframes_buf_size)
+      return -1;
+
+    od_enc_rc_reset(rc);
+
+    /* Everything looks ok */
+    rc->twopass_buffer = rc->twopass_allframes_buf;
+    rc->twopass_state = 2;
+    rc->twopass_buffer_bytes = 0;
+  }
+
+  rc->cur_metrics.frame_type = od_rc_unbuffer_val(rc, 1);
+  rc->cur_metrics.log_scale = od_rc_unbuffer_val(rc, 4);
+
+  return 0;
+}
author	trav90 <travawine@palemoon.org>	2018-10-15 21:45:30 -0500
committer	trav90 <travawine@palemoon.org>	2018-10-15 21:45:30 -0500
commit	68569dee1416593955c1570d638b3d9250b33012 (patch)
tree	d960f017cd7eba3f125b7e8a813789ee2e076310 /third_party/aom/av1/encoder/ratectrl_xiph.c
parent	07c17b6b98ed32fcecff15c083ab0fd878de3cf0 (diff)
download	UXP-68569dee1416593955c1570d638b3d9250b33012.tar UXP-68569dee1416593955c1570d638b3d9250b33012.tar.gz UXP-68569dee1416593955c1570d638b3d9250b33012.tar.lz UXP-68569dee1416593955c1570d638b3d9250b33012.tar.xz UXP-68569dee1416593955c1570d638b3d9250b33012.zip